LLVM  13.0.1
AArch64ISelLowering.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AArch64TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64ISelLowering.h"
15 #include "AArch64ExpandImm.h"
17 #include "AArch64PerfectShuffle.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/ADT/Triple.h"
31 #include "llvm/ADT/Twine.h"
34 #include "llvm/CodeGen/Analysis.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DebugLoc.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Function.h"
56 #include "llvm/IR/GlobalValue.h"
57 #include "llvm/IR/IRBuilder.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/IntrinsicsAArch64.h"
63 #include "llvm/IR/Module.h"
64 #include "llvm/IR/OperandTraits.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/Value.h"
69 #include "llvm/MC/MCRegisterInfo.h"
70 #include "llvm/Support/Casting.h"
71 #include "llvm/Support/CodeGen.h"
73 #include "llvm/Support/Compiler.h"
74 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/KnownBits.h"
82 #include <algorithm>
83 #include <bitset>
84 #include <cassert>
85 #include <cctype>
86 #include <cstdint>
87 #include <cstdlib>
88 #include <iterator>
89 #include <limits>
90 #include <tuple>
91 #include <utility>
92 #include <vector>
93 
94 using namespace llvm;
95 using namespace llvm::PatternMatch;
96 
97 #define DEBUG_TYPE "aarch64-lower"
98 
99 STATISTIC(NumTailCalls, "Number of tail calls");
100 STATISTIC(NumShiftInserts, "Number of vector shift inserts");
101 STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
102 
103 // FIXME: The necessary dtprel relocations don't seem to be supported
104 // well in the GNU bfd and gold linkers at the moment. Therefore, by
105 // default, for now, fall back to GeneralDynamic code generation.
107  "aarch64-elf-ldtls-generation", cl::Hidden,
108  cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
109  cl::init(false));
110 
111 static cl::opt<bool>
112 EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
113  cl::desc("Enable AArch64 logical imm instruction "
114  "optimization"),
115  cl::init(true));
116 
117 // Temporary option added for the purpose of testing functionality added
118 // to DAGCombiner.cpp in D92230. It is expected that this can be removed
119 // in future when both implementations will be based off MGATHER rather
120 // than the GLD1 nodes added for the SVE gather load intrinsics.
121 static cl::opt<bool>
122 EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
123  cl::desc("Combine extends of AArch64 masked "
124  "gather intrinsics"),
125  cl::init(true));
126 
127 /// Value type used for condition codes.
128 static const MVT MVT_CC = MVT::i32;
129 
130 static inline EVT getPackedSVEVectorVT(EVT VT) {
131  switch (VT.getSimpleVT().SimpleTy) {
132  default:
133  llvm_unreachable("unexpected element type for vector");
134  case MVT::i8:
135  return MVT::nxv16i8;
136  case MVT::i16:
137  return MVT::nxv8i16;
138  case MVT::i32:
139  return MVT::nxv4i32;
140  case MVT::i64:
141  return MVT::nxv2i64;
142  case MVT::f16:
143  return MVT::nxv8f16;
144  case MVT::f32:
145  return MVT::nxv4f32;
146  case MVT::f64:
147  return MVT::nxv2f64;
148  case MVT::bf16:
149  return MVT::nxv8bf16;
150  }
151 }
152 
153 // NOTE: Currently there's only a need to return integer vector types. If this
154 // changes then just add an extra "type" parameter.
156  switch (EC.getKnownMinValue()) {
157  default:
158  llvm_unreachable("unexpected element count for vector");
159  case 16:
160  return MVT::nxv16i8;
161  case 8:
162  return MVT::nxv8i16;
163  case 4:
164  return MVT::nxv4i32;
165  case 2:
166  return MVT::nxv2i64;
167  }
168 }
169 
170 static inline EVT getPromotedVTForPredicate(EVT VT) {
172  "Expected scalable predicate vector type!");
173  switch (VT.getVectorMinNumElements()) {
174  default:
175  llvm_unreachable("unexpected element count for vector");
176  case 2:
177  return MVT::nxv2i64;
178  case 4:
179  return MVT::nxv4i32;
180  case 8:
181  return MVT::nxv8i16;
182  case 16:
183  return MVT::nxv16i8;
184  }
185 }
186 
187 /// Returns true if VT's elements occupy the lowest bit positions of its
188 /// associated register class without any intervening space.
189 ///
190 /// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
191 /// same register class, but only nxv8f16 can be treated as a packed vector.
192 static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
193  assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
194  "Expected legal vector type!");
195  return VT.isFixedLengthVector() ||
197 }
198 
199 // Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
200 // predicate and end with a passthru value matching the result type.
201 static bool isMergePassthruOpcode(unsigned Opc) {
202  switch (Opc) {
203  default:
204  return false;
231  return true;
232  }
233 }
234 
236  const AArch64Subtarget &STI)
237  : TargetLowering(TM), Subtarget(&STI) {
238  // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
239  // we have to make something up. Arbitrarily, choose ZeroOrOne.
241  // When comparing vectors the result sets the different elements in the
242  // vector to all-one or all-zero.
244 
245  // Set up the register classes.
246  addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
247  addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
248 
249  if (Subtarget->hasLS64()) {
250  addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
253  }
254 
255  if (Subtarget->hasFPARMv8()) {
256  addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
257  addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
258  addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
259  addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
260  addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
261  }
262 
263  if (Subtarget->hasNEON()) {
264  addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
265  addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
266  // Someone set us up the NEON.
267  addDRTypeForNEON(MVT::v2f32);
268  addDRTypeForNEON(MVT::v8i8);
269  addDRTypeForNEON(MVT::v4i16);
270  addDRTypeForNEON(MVT::v2i32);
271  addDRTypeForNEON(MVT::v1i64);
272  addDRTypeForNEON(MVT::v1f64);
273  addDRTypeForNEON(MVT::v4f16);
274  if (Subtarget->hasBF16())
275  addDRTypeForNEON(MVT::v4bf16);
276 
277  addQRTypeForNEON(MVT::v4f32);
278  addQRTypeForNEON(MVT::v2f64);
279  addQRTypeForNEON(MVT::v16i8);
280  addQRTypeForNEON(MVT::v8i16);
281  addQRTypeForNEON(MVT::v4i32);
282  addQRTypeForNEON(MVT::v2i64);
283  addQRTypeForNEON(MVT::v8f16);
284  if (Subtarget->hasBF16())
285  addQRTypeForNEON(MVT::v8bf16);
286  }
287 
288  if (Subtarget->hasSVE()) {
289  // Add legal sve predicate types
290  addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
291  addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
292  addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
293  addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
294 
295  // Add legal sve data types
296  addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
297  addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
298  addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
299  addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
300 
301  addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
302  addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
303  addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
304  addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
305  addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
306  addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
307 
308  if (Subtarget->hasBF16()) {
309  addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
310  addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
311  addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
312  }
313 
314  if (Subtarget->useSVEForFixedLengthVectors()) {
316  if (useSVEForFixedLengthVectorVT(VT))
317  addRegisterClass(VT, &AArch64::ZPRRegClass);
318 
320  if (useSVEForFixedLengthVectorVT(VT))
321  addRegisterClass(VT, &AArch64::ZPRRegClass);
322  }
323 
324  for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
333  }
334 
335  for (auto VT :
339 
340  for (auto VT :
342  MVT::nxv2f64 }) {
354 
366  }
367  }
368 
369  // Compute derived properties from the register classes
371 
372  // Provide all sorts of operation actions
406 
410 
414 
416 
417  // Custom lowering hooks are needed for XOR
418  // to fold it into CSINC/CSINV.
421 
422  // Virtually no operation on f128 is legal, but LLVM can't expand them when
423  // there's a valid register class, so we need custom operations in most cases.
447 
448  // Lowering for many of the conversions is actually specified by the non-f128
449  // type. The LowerXXX function will be trivial when f128 isn't involved.
480 
485 
486  // Variable arguments.
491 
492  // Variable-sized objects.
495 
496  if (Subtarget->isTargetWindows())
498  else
500 
501  // Constant pool entries
503 
504  // BlockAddress
506 
507  // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
516 
517  // AArch64 lacks both left-rotate and popcount instructions.
520  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
523  }
524 
525  // AArch64 doesn't have i32 MULH{S|U}.
528 
529  // AArch64 doesn't have {U|S}MUL_LOHI.
532 
536 
539 
542  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
545  }
552 
553  // Custom lower Add/Sub/Mul with overflow.
566 
575  if (Subtarget->hasFullFP16())
577  else
579 
613 
614  if (!Subtarget->hasFullFP16()) {
638 
639  // promote v4f16 to v4f32 when that is known to be safe.
648 
665 
687  }
688 
689  // AArch64 has implementations of a lot of rounding-like FP operations.
690  for (MVT Ty : {MVT::f32, MVT::f64}) {
706  }
707 
708  if (Subtarget->hasFullFP16()) {
720  }
721 
723 
726 
732 
733  // Generate outline atomics library calls only if LSE was not specified for
734  // subtarget
735  if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
761 #define LCALLNAMES(A, B, N) \
762  setLibcallName(A##N##_RELAX, #B #N "_relax"); \
763  setLibcallName(A##N##_ACQ, #B #N "_acq"); \
764  setLibcallName(A##N##_REL, #B #N "_rel"); \
765  setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
766 #define LCALLNAME4(A, B) \
767  LCALLNAMES(A, B, 1) \
768  LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
769 #define LCALLNAME5(A, B) \
770  LCALLNAMES(A, B, 1) \
771  LCALLNAMES(A, B, 2) \
772  LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
773  LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
774  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
775  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
776  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
777  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
778  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
779 #undef LCALLNAMES
780 #undef LCALLNAME4
781 #undef LCALLNAME5
782  }
783 
784  // 128-bit loads and stores can be done without expanding
787 
788  // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
789  // custom lowering, as there are no un-paired non-temporal stores and
790  // legalization will break up 256 bit inputs.
798 
799  // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
800  // This requires the Performance Monitors extension.
801  if (Subtarget->hasPerfMon())
803 
804  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
805  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
806  // Issue __sincos_stret if available.
809  } else {
812  }
813 
814  if (Subtarget->getTargetTriple().isOSMSVCRT()) {
815  // MSVCRT doesn't have powi; fall back to pow
816  setLibcallName(RTLIB::POWI_F32, nullptr);
817  setLibcallName(RTLIB::POWI_F64, nullptr);
818  }
819 
820  // Make floating-point constants legal for the large code model, so they don't
821  // become loads from the constant pool.
822  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
825  }
826 
827  // AArch64 does not have floating-point extending loads, i1 sign-extending
828  // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
829  for (MVT VT : MVT::fp_valuetypes()) {
834  }
835  for (MVT VT : MVT::integer_valuetypes())
837 
845 
849 
850  // Indexed loads and stores are supported.
851  for (unsigned im = (unsigned)ISD::PRE_INC;
852  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
869  }
870 
871  // Trap.
875 
876  // We combine OR nodes for bitfield operations.
878  // Try to create BICs for vector ANDs.
880 
881  // Vector add and sub nodes may conceal a high-half opportunity.
882  // Also, try to fold ADD into CSINC/CSINV..
890 
891  // TODO: Do the same for FP_TO_*INT_SAT.
895 
896  // Try and combine setcc with csel
898 
900 
909  if (Subtarget->supportsAddressTopByteIgnored())
911 
913 
916 
923 
925 
926  // In case of strict alignment, avoid an excessive number of byte wide stores.
930 
935 
937 
941 
943 
945 
946  EnableExtLdPromotion = true;
947 
948  // Set required alignment.
950  // Set preferred alignments.
953 
954  // Only change the limit for entries in a jump table if specified by
955  // the sub target, but not at the command line.
956  unsigned MaxJT = STI.getMaximumJumpTableSize();
957  if (MaxJT && getMaximumJumpTableSize() == UINT_MAX)
959 
960  setHasExtractBitsInsn(true);
961 
963 
964  if (Subtarget->hasNEON()) {
965  // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
966  // silliness like this:
993 
999 
1001 
1002  // AArch64 doesn't have a direct vector ->f32 conversion instructions for
1003  // elements smaller than i32, so promote the input to i32 first.
1010 
1011  // Similarly, there is no direct i32 -> f64 vector conversion instruction.
1016  // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
1017  // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
1020 
1021  if (Subtarget->hasFullFP16()) {
1026  } else {
1027  // when AArch64 doesn't have fullfp16 support, promote the input
1028  // to i32 first.
1033  }
1034 
1043 
1044  // AArch64 doesn't have MUL.2d:
1046  // Custom handling for some quad-vector types to detect MULL.
1050 
1051  // Saturates
1052  for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1058  }
1059 
1061  MVT::v4i32}) {
1064  }
1065 
1066  // Vector reductions
1067  for (MVT VT : { MVT::v4f16, MVT::v2f32,
1069  if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1072 
1074  }
1075  }
1076  for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1083  }
1085 
1088  // Likewise, narrowing and extending vector loads/stores aren't handled
1089  // directly.
1090  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1092 
1093  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1096  } else {
1099  }
1102 
1105 
1106  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1107  setTruncStoreAction(VT, InnerVT, Expand);
1108  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1109  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1110  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1111  }
1112  }
1113 
1114  // AArch64 has implementations of a lot of rounding-like FP operations.
1115  for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
1123  }
1124 
1125  if (Subtarget->hasFullFP16()) {
1126  for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
1134  }
1135  }
1136 
1137  if (Subtarget->hasSVE())
1139 
1141 
1148  }
1149 
1150  if (Subtarget->hasSVE()) {
1151  for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1190 
1196  }
1197 
1198  // Illegal unpacked integer vector types.
1199  for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1202  }
1203 
1204  // Legalize unpacked bitcasts to REINTERPRET_CAST.
1208 
1209  for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1218 
1222 
1223  // There are no legal MVT::nxv16f## based types.
1224  if (VT != MVT::nxv16i1) {
1227  }
1228  }
1229 
1230  // NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
1238  }
1239 
1241  for (MVT InnerVT : MVT::fp_scalable_vector_valuetypes()) {
1242  // Avoid marking truncating FP stores as legal to prevent the
1243  // DAGCombiner from creating unsupported truncating stores.
1244  setTruncStoreAction(VT, InnerVT, Expand);
1245  // SVE does not have floating-point extending loads.
1246  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1247  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1248  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1249  }
1250  }
1251 
1252  // SVE supports truncating stores of 64 and 128-bit vectors
1258 
1294 
1296  }
1297 
1298  for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1303  }
1304 
1306 
1309 
1310  // NOTE: Currently this has to happen after computeRegisterProperties rather
1311  // than the preferred option of combining it with the addRegisterClass call.
1312  if (Subtarget->useSVEForFixedLengthVectors()) {
1314  if (useSVEForFixedLengthVectorVT(VT))
1315  addTypeForFixedLengthSVE(VT);
1317  if (useSVEForFixedLengthVectorVT(VT))
1318  addTypeForFixedLengthSVE(VT);
1319 
1320  // 64bit results can mean a bigger than NEON input.
1321  for (auto VT : {MVT::v8i8, MVT::v4i16})
1324 
1325  // 128bit results imply a bigger than NEON input.
1326  for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1328  for (auto VT : {MVT::v8f16, MVT::v4f32})
1330 
1331  // These operations are not supported on NEON but SVE can do them.
1370 
1371  // Int operations with no NEON support.
1372  for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1379  }
1380 
1381  // FP operations with no NEON support.
1382  for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
1385 
1386  // Use SVE for vectors with more than 2 elements.
1387  for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1389  }
1390 
1395  }
1396 
1398 }
1399 
1400 void AArch64TargetLowering::addTypeForNEON(MVT VT) {
1401  assert(VT.isVector() && "VT should be a vector type");
1402 
1403  if (VT.isFloatingPoint()) {
1405  setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
1406  setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
1407  }
1408 
1409  // Mark vector float intrinsics as expand.
1410  if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1419  }
1420 
1421  // But we do support custom-lowering for FCOPYSIGN.
1422  if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
1423  ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
1425 
1437 
1441  for (MVT InnerVT : MVT::all_valuetypes())
1442  setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
1443 
1444  // CNT supports only B element sizes, then use UADDLP to widen.
1445  if (VT != MVT::v8i8 && VT != MVT::v16i8)
1447 
1453 
1456 
1457  if (!VT.isFloatingPoint())
1459 
1460  // [SU][MIN|MAX] are available for all NEON types apart from i64.
1461  if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
1462  for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
1463  setOperationAction(Opcode, VT, Legal);
1464 
1465  // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
1466  if (VT.isFloatingPoint() &&
1467  VT.getVectorElementType() != MVT::bf16 &&
1468  (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
1469  for (unsigned Opcode :
1471  setOperationAction(Opcode, VT, Legal);
1472 
1473  if (Subtarget->isLittleEndian()) {
1474  for (unsigned im = (unsigned)ISD::PRE_INC;
1475  im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1478  }
1479  }
1480 }
1481 
1482 void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1483  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
1484 
1485  // By default everything must be expanded.
1486  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1488 
1489  // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
1491 
1492  if (VT.isFloatingPoint()) {
1504  }
1505 
1506  // Mark integer truncating stores as having custom lowering
1507  if (VT.isInteger()) {
1508  MVT InnerVT = VT.changeVectorElementType(MVT::i8);
1509  while (InnerVT != VT) {
1510  setTruncStoreAction(VT, InnerVT, Custom);
1511  InnerVT = InnerVT.changeVectorElementType(
1512  MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
1513  }
1514  }
1515 
1516  // Lower fixed length vector operations to scalable equivalents.
1598 }
1599 
1600 void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
1601  addRegisterClass(VT, &AArch64::FPR64RegClass);
1602  addTypeForNEON(VT);
1603 }
1604 
1605 void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
1606  addRegisterClass(VT, &AArch64::FPR128RegClass);
1607  addTypeForNEON(VT);
1608 }
1609 
1611  LLVMContext &C, EVT VT) const {
1612  if (!VT.isVector())
1613  return MVT::i32;
1614  if (VT.isScalableVector())
1617 }
1618 
1619 static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
1620  const APInt &Demanded,
1622  unsigned NewOpc) {
1623  uint64_t OldImm = Imm, NewImm, Enc;
1624  uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
1625 
1626  // Return if the immediate is already all zeros, all ones, a bimm32 or a
1627  // bimm64.
1628  if (Imm == 0 || Imm == Mask ||
1630  return false;
1631 
1632  unsigned EltSize = Size;
1633  uint64_t DemandedBits = Demanded.getZExtValue();
1634 
1635  // Clear bits that are not demanded.
1636  Imm &= DemandedBits;
1637 
1638  while (true) {
1639  // The goal here is to set the non-demanded bits in a way that minimizes
1640  // the number of switching between 0 and 1. In order to achieve this goal,
1641  // we set the non-demanded bits to the value of the preceding demanded bits.
1642  // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
1643  // non-demanded bit), we copy bit0 (1) to the least significant 'x',
1644  // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
1645  // The final result is 0b11000011.
1646  uint64_t NonDemandedBits = ~DemandedBits;
1647  uint64_t InvertedImm = ~Imm & DemandedBits;
1648  uint64_t RotatedImm =
1649  ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1650  NonDemandedBits;
1651  uint64_t Sum = RotatedImm + NonDemandedBits;
1652  bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1653  uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1654  NewImm = (Imm | Ones) & Mask;
1655 
1656  // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
1657  // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
1658  // we halve the element size and continue the search.
1659  if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
1660  break;
1661 
1662  // We cannot shrink the element size any further if it is 2-bits.
1663  if (EltSize == 2)
1664  return false;
1665 
1666  EltSize /= 2;
1667  Mask >>= EltSize;
1668  uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
1669 
1670  // Return if there is mismatch in any of the demanded bits of Imm and Hi.
1671  if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
1672  return false;
1673 
1674  // Merge the upper and lower halves of Imm and DemandedBits.
1675  Imm |= Hi;
1676  DemandedBits |= DemandedBitsHi;
1677  }
1678 
1679  ++NumOptimizedImms;
1680 
1681  // Replicate the element across the register width.
1682  while (EltSize < Size) {
1683  NewImm |= NewImm << EltSize;
1684  EltSize *= 2;
1685  }
1686 
1687  (void)OldImm;
1688  assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
1689  "demanded bits should never be altered");
1690  assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
1691 
1692  // Create the new constant immediate node.
1693  EVT VT = Op.getValueType();
1694  SDLoc DL(Op);
1695  SDValue New;
1696 
1697  // If the new constant immediate is all-zeros or all-ones, let the target
1698  // independent DAG combine optimize this node.
1699  if (NewImm == 0 || NewImm == OrigMask) {
1700  New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1701  TLO.DAG.getConstant(NewImm, DL, VT));
1702  // Otherwise, create a machine node so that target independent DAG combine
1703  // doesn't undo this optimization.
1704  } else {
1706  SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1707  New = SDValue(
1708  TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1709  }
1710 
1711  return TLO.CombineTo(Op, New);
1712 }
1713 
1715  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1716  TargetLoweringOpt &TLO) const {
1717  // Delay this optimization to as late as possible.
1718  if (!TLO.LegalOps)
1719  return false;
1720 
1722  return false;
1723 
1724  EVT VT = Op.getValueType();
1725  if (VT.isVector())
1726  return false;
1727 
1728  unsigned Size = VT.getSizeInBits();
1729  assert((Size == 32 || Size == 64) &&
1730  "i32 or i64 is expected after legalization.");
1731 
1732  // Exit early if we demand all bits.
1733  if (DemandedBits.countPopulation() == Size)
1734  return false;
1735 
1736  unsigned NewOpc;
1737  switch (Op.getOpcode()) {
1738  default:
1739  return false;
1740  case ISD::AND:
1741  NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1742  break;
1743  case ISD::OR:
1744  NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1745  break;
1746  case ISD::XOR:
1747  NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1748  break;
1749  }
1750  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1751  if (!C)
1752  return false;
1753  uint64_t Imm = C->getZExtValue();
1754  return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
1755 }
1756 
1757 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
1758 /// Mask are known to be either zero or one and return them Known.
1760  const SDValue Op, KnownBits &Known,
1761  const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1762  switch (Op.getOpcode()) {
1763  default:
1764  break;
1765  case AArch64ISD::CSEL: {
1766  KnownBits Known2;
1767  Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1768  Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1769  Known = KnownBits::commonBits(Known, Known2);
1770  break;
1771  }
1772  case AArch64ISD::LOADgot:
1773  case AArch64ISD::ADDlow: {
1774  if (!Subtarget->isTargetILP32())
1775  break;
1776  // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1777  Known.Zero = APInt::getHighBitsSet(64, 32);
1778  break;
1779  }
1780  case ISD::INTRINSIC_W_CHAIN: {
1781  ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1782  Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1783  switch (IntID) {
1784  default: return;
1785  case Intrinsic::aarch64_ldaxr:
1786  case Intrinsic::aarch64_ldxr: {
1787  unsigned BitWidth = Known.getBitWidth();
1788  EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1789  unsigned MemBits = VT.getScalarSizeInBits();
1790  Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1791  return;
1792  }
1793  }
1794  break;
1795  }
1797  case ISD::INTRINSIC_VOID: {
1798  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1799  switch (IntNo) {
1800  default:
1801  break;
1802  case Intrinsic::aarch64_neon_umaxv:
1803  case Intrinsic::aarch64_neon_uminv: {
1804  // Figure out the datatype of the vector operand. The UMINV instruction
1805  // will zero extend the result, so we can mark as known zero all the
1806  // bits larger than the element datatype. 32-bit or larget doesn't need
1807  // this as those are legal types and will be handled by isel directly.
1808  MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1809  unsigned BitWidth = Known.getBitWidth();
1810  if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1811  assert(BitWidth >= 8 && "Unexpected width!");
1813  Known.Zero |= Mask;
1814  } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1815  assert(BitWidth >= 16 && "Unexpected width!");
1817  Known.Zero |= Mask;
1818  }
1819  break;
1820  } break;
1821  }
1822  }
1823  }
1824 }
1825 
1827  EVT) const {
1828  return MVT::i64;
1829 }
1830 
1832  EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1833  bool *Fast) const {
1834  if (Subtarget->requiresStrictAlign())
1835  return false;
1836 
1837  if (Fast) {
1838  // Some CPUs are fine with unaligned stores except for 128-bit ones.
1839  *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1840  // See comments in performSTORECombine() for more details about
1841  // these conditions.
1842 
1843  // Code that uses clang vector extensions can mark that it
1844  // wants unaligned accesses to be treated as fast by
1845  // underspecifying alignment to be 1 or 2.
1846  Alignment <= 2 ||
1847 
1848  // Disregard v2i64. Memcpy lowering produces those and splitting
1849  // them regresses performance on micro-benchmarks and olden/bh.
1850  VT == MVT::v2i64;
1851  }
1852  return true;
1853 }
1854 
1855 // Same as above but handling LLTs instead.
1857  LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1858  bool *Fast) const {
1859  if (Subtarget->requiresStrictAlign())
1860  return false;
1861 
1862  if (Fast) {
1863  // Some CPUs are fine with unaligned stores except for 128-bit ones.
1864  *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1865  Ty.getSizeInBytes() != 16 ||
1866  // See comments in performSTORECombine() for more details about
1867  // these conditions.
1868 
1869  // Code that uses clang vector extensions can mark that it
1870  // wants unaligned accesses to be treated as fast by
1871  // underspecifying alignment to be 1 or 2.
1872  Alignment <= 2 ||
1873 
1874  // Disregard v2i64. Memcpy lowering produces those and splitting
1875  // them regresses performance on micro-benchmarks and olden/bh.
1876  Ty == LLT::fixed_vector(2, 64);
1877  }
1878  return true;
1879 }
1880 
1881 FastISel *
1883  const TargetLibraryInfo *libInfo) const {
1884  return AArch64::createFastISel(funcInfo, libInfo);
1885 }
1886 
1887 const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1888 #define MAKE_CASE(V) \
1889  case V: \
1890  return #V;
1891  switch ((AArch64ISD::NodeType)Opcode) {
1893  break;
2171  }
2172 #undef MAKE_CASE
2173  return nullptr;
2174 }
2175 
2178  MachineBasicBlock *MBB) const {
2179  // We materialise the F128CSEL pseudo-instruction as some control flow and a
2180  // phi node:
2181 
2182  // OrigBB:
2183  // [... previous instrs leading to comparison ...]
2184  // b.ne TrueBB
2185  // b EndBB
2186  // TrueBB:
2187  // ; Fallthrough
2188  // EndBB:
2189  // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
2190 
2191  MachineFunction *MF = MBB->getParent();
2192  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2193  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2194  DebugLoc DL = MI.getDebugLoc();
2196 
2197  Register DestReg = MI.getOperand(0).getReg();
2198  Register IfTrueReg = MI.getOperand(1).getReg();
2199  Register IfFalseReg = MI.getOperand(2).getReg();
2200  unsigned CondCode = MI.getOperand(3).getImm();
2201  bool NZCVKilled = MI.getOperand(4).isKill();
2202 
2203  MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
2204  MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
2205  MF->insert(It, TrueBB);
2206  MF->insert(It, EndBB);
2207 
2208  // Transfer rest of current basic-block to EndBB
2209  EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
2210  MBB->end());
2212 
2213  BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
2214  BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
2215  MBB->addSuccessor(TrueBB);
2216  MBB->addSuccessor(EndBB);
2217 
2218  // TrueBB falls through to the end.
2219  TrueBB->addSuccessor(EndBB);
2220 
2221  if (!NZCVKilled) {
2222  TrueBB->addLiveIn(AArch64::NZCV);
2223  EndBB->addLiveIn(AArch64::NZCV);
2224  }
2225 
2226  BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
2227  .addReg(IfTrueReg)
2228  .addMBB(TrueBB)
2229  .addReg(IfFalseReg)
2230  .addMBB(MBB);
2231 
2232  MI.eraseFromParent();
2233  return EndBB;
2234 }
2235 
2237  MachineInstr &MI, MachineBasicBlock *BB) const {
2239  BB->getParent()->getFunction().getPersonalityFn())) &&
2240  "SEH does not use catchret!");
2241  return BB;
2242 }
2243 
2245  MachineInstr &MI, MachineBasicBlock *BB) const {
2246  switch (MI.getOpcode()) {
2247  default:
2248 #ifndef NDEBUG
2249  MI.dump();
2250 #endif
2251  llvm_unreachable("Unexpected instruction for custom inserter!");
2252 
2253  case AArch64::F128CSEL:
2254  return EmitF128CSEL(MI, BB);
2255 
2256  case TargetOpcode::STACKMAP:
2257  case TargetOpcode::PATCHPOINT:
2258  case TargetOpcode::STATEPOINT:
2259  return emitPatchPoint(MI, BB);
2260 
2261  case AArch64::CATCHRET:
2262  return EmitLoweredCatchRet(MI, BB);
2263  }
2264 }
2265 
2266 //===----------------------------------------------------------------------===//
2267 // AArch64 Lowering private implementation.
2268 //===----------------------------------------------------------------------===//
2269 
2270 //===----------------------------------------------------------------------===//
2271 // Lowering Code
2272 //===----------------------------------------------------------------------===//
2273 
2274 // Forward declarations of SVE fixed length lowering helpers
2279  SelectionDAG &DAG);
2280 
2281 /// isZerosVector - Check whether SDNode N is a zero-filled vector.
2282 static bool isZerosVector(const SDNode *N) {
2283  // Look through a bit convert.
2284  while (N->getOpcode() == ISD::BITCAST)
2285  N = N->getOperand(0).getNode();
2286 
2288  return true;
2289 
2290  if (N->getOpcode() != AArch64ISD::DUP)
2291  return false;
2292 
2293  auto Opnd0 = N->getOperand(0);
2294  auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
2295  auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
2296  return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero());
2297 }
2298 
2299 /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
2300 /// CC
2302  switch (CC) {
2303  default:
2304  llvm_unreachable("Unknown condition code!");
2305  case ISD::SETNE:
2306  return AArch64CC::NE;
2307  case ISD::SETEQ:
2308  return AArch64CC::EQ;
2309  case ISD::SETGT:
2310  return AArch64CC::GT;
2311  case ISD::SETGE:
2312  return AArch64CC::GE;
2313  case ISD::SETLT:
2314  return AArch64CC::LT;
2315  case ISD::SETLE:
2316  return AArch64CC::LE;
2317  case ISD::SETUGT:
2318  return AArch64CC::HI;
2319  case ISD::SETUGE:
2320  return AArch64CC::HS;
2321  case ISD::SETULT:
2322  return AArch64CC::LO;
2323  case ISD::SETULE:
2324  return AArch64CC::LS;
2325  }
2326 }
2327 
2328 /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
2331  AArch64CC::CondCode &CondCode2) {
2332  CondCode2 = AArch64CC::AL;
2333  switch (CC) {
2334  default:
2335  llvm_unreachable("Unknown FP condition!");
2336  case ISD::SETEQ:
2337  case ISD::SETOEQ:
2339  break;
2340  case ISD::SETGT:
2341  case ISD::SETOGT:
2343  break;
2344  case ISD::SETGE:
2345  case ISD::SETOGE:
2347  break;
2348  case ISD::SETOLT:
2350  break;
2351  case ISD::SETOLE:
2353  break;
2354  case ISD::SETONE:
2356  CondCode2 = AArch64CC::GT;
2357  break;
2358  case ISD::SETO:
2360  break;
2361  case ISD::SETUO:
2363  break;
2364  case ISD::SETUEQ:
2366  CondCode2 = AArch64CC::VS;
2367  break;
2368  case ISD::SETUGT:
2370  break;
2371  case ISD::SETUGE:
2373  break;
2374  case ISD::SETLT:
2375  case ISD::SETULT:
2377  break;
2378  case ISD::SETLE:
2379  case ISD::SETULE:
2381  break;
2382  case ISD::SETNE:
2383  case ISD::SETUNE:
2385  break;
2386  }
2387 }
2388 
2389 /// Convert a DAG fp condition code to an AArch64 CC.
2390 /// This differs from changeFPCCToAArch64CC in that it returns cond codes that
2391 /// should be AND'ed instead of OR'ed.
2394  AArch64CC::CondCode &CondCode2) {
2395  CondCode2 = AArch64CC::AL;
2396  switch (CC) {
2397  default:
2398  changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2399  assert(CondCode2 == AArch64CC::AL);
2400  break;
2401  case ISD::SETONE:
2402  // (a one b)
2403  // == ((a olt b) || (a ogt b))
2404  // == ((a ord b) && (a une b))
2406  CondCode2 = AArch64CC::NE;
2407  break;
2408  case ISD::SETUEQ:
2409  // (a ueq b)
2410  // == ((a uno b) || (a oeq b))
2411  // == ((a ule b) && (a uge b))
2413  CondCode2 = AArch64CC::LE;
2414  break;
2415  }
2416 }
2417 
2418 /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
2419 /// CC usable with the vector instructions. Fewer operations are available
2420 /// without a real NZCV register, so we have to use less efficient combinations
2421 /// to get the same effect.
2424  AArch64CC::CondCode &CondCode2,
2425  bool &Invert) {
2426  Invert = false;
2427  switch (CC) {
2428  default:
2429  // Mostly the scalar mappings work fine.
2430  changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2431  break;
2432  case ISD::SETUO:
2433  Invert = true;
2435  case ISD::SETO:
2437  CondCode2 = AArch64CC::GE;
2438  break;
2439  case ISD::SETUEQ:
2440  case ISD::SETULT:
2441  case ISD::SETULE:
2442  case ISD::SETUGT:
2443  case ISD::SETUGE:
2444  // All of the compare-mask comparisons are ordered, but we can switch
2445  // between the two by a double inversion. E.g. ULE == !OGT.
2446  Invert = true;
2447  changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
2448  CondCode, CondCode2);
2449  break;
2450  }
2451 }
2452 
2453 static bool isLegalArithImmed(uint64_t C) {
2454  // Matches AArch64DAGToDAGISel::SelectArithImmed().
2455  bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
2456  LLVM_DEBUG(dbgs() << "Is imm " << C
2457  << " legal: " << (IsLegal ? "yes\n" : "no\n"));
2458  return IsLegal;
2459 }
2460 
2461 // Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
2462 // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
2463 // can be set differently by this operation. It comes down to whether
2464 // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
2465 // everything is fine. If not then the optimization is wrong. Thus general
2466 // comparisons are only valid if op2 != 0.
2467 //
2468 // So, finally, the only LLVM-native comparisons that don't mention C and V
2469 // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
2470 // the absence of information about op2.
2471 static bool isCMN(SDValue Op, ISD::CondCode CC) {
2472  return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
2473  (CC == ISD::SETEQ || CC == ISD::SETNE);
2474 }
2475 
2477  SelectionDAG &DAG, SDValue Chain,
2478  bool IsSignaling) {
2479  EVT VT = LHS.getValueType();
2480  assert(VT != MVT::f128);
2481  assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented");
2482  unsigned Opcode =
2484  return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
2485 }
2486 
2488  const SDLoc &dl, SelectionDAG &DAG) {
2489  EVT VT = LHS.getValueType();
2490  const bool FullFP16 =
2491  static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2492 
2493  if (VT.isFloatingPoint()) {
2494  assert(VT != MVT::f128);
2495  if (VT == MVT::f16 && !FullFP16) {
2496  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
2497  RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
2498  VT = MVT::f32;
2499  }
2500  return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
2501  }
2502 
2503  // The CMP instruction is just an alias for SUBS, and representing it as
2504  // SUBS means that it's possible to get CSE with subtract operations.
2505  // A later phase can perform the optimization of setting the destination
2506  // register to WZR/XZR if it ends up being unused.
2507  unsigned Opcode = AArch64ISD::SUBS;
2508 
2509  if (isCMN(RHS, CC)) {
2510  // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
2511  Opcode = AArch64ISD::ADDS;
2512  RHS = RHS.getOperand(1);
2513  } else if (isCMN(LHS, CC)) {
2514  // As we are looking for EQ/NE compares, the operands can be commuted ; can
2515  // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
2516  Opcode = AArch64ISD::ADDS;
2517  LHS = LHS.getOperand(1);
2518  } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
2519  if (LHS.getOpcode() == ISD::AND) {
2520  // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
2521  // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
2522  // of the signed comparisons.
2523  const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
2524  DAG.getVTList(VT, MVT_CC),
2525  LHS.getOperand(0),
2526  LHS.getOperand(1));
2527  // Replace all users of (and X, Y) with newly generated (ands X, Y)
2528  DAG.ReplaceAllUsesWith(LHS, ANDSNode);
2529  return ANDSNode.getValue(1);
2530  } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
2531  // Use result of ANDS
2532  return LHS.getValue(1);
2533  }
2534  }
2535 
2536  return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
2537  .getValue(1);
2538 }
2539 
2540 /// \defgroup AArch64CCMP CMP;CCMP matching
2541 ///
2542 /// These functions deal with the formation of CMP;CCMP;... sequences.
2543 /// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
2544 /// a comparison. They set the NZCV flags to a predefined value if their
2545 /// predicate is false. This allows to express arbitrary conjunctions, for
2546 /// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
2547 /// expressed as:
2548 /// cmp A
2549 /// ccmp B, inv(CB), CA
2550 /// check for CB flags
2551 ///
2552 /// This naturally lets us implement chains of AND operations with SETCC
2553 /// operands. And we can even implement some other situations by transforming
2554 /// them:
2555 /// - We can implement (NEG SETCC) i.e. negating a single comparison by
2556 /// negating the flags used in a CCMP/FCCMP operations.
2557 /// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
2558 /// by negating the flags we test for afterwards. i.e.
2559 /// NEG (CMP CCMP CCCMP ...) can be implemented.
2560 /// - Note that we can only ever negate all previously processed results.
2561 /// What we can not implement by flipping the flags to test is a negation
2562 /// of two sub-trees (because the negation affects all sub-trees emitted so
2563 /// far, so the 2nd sub-tree we emit would also affect the first).
2564 /// With those tools we can implement some OR operations:
2565 /// - (OR (SETCC A) (SETCC B)) can be implemented via:
2566 /// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
2567 /// - After transforming OR to NEG/AND combinations we may be able to use NEG
2568 /// elimination rules from earlier to implement the whole thing as a
2569 /// CCMP/FCCMP chain.
2570 ///
2571 /// As complete example:
2572 /// or (or (setCA (cmp A)) (setCB (cmp B)))
2573 /// (and (setCC (cmp C)) (setCD (cmp D)))"
2574 /// can be reassociated to:
2575 /// or (and (setCC (cmp C)) setCD (cmp D))
2576 // (or (setCA (cmp A)) (setCB (cmp B)))
2577 /// can be transformed to:
2578 /// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
2579 /// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
2580 /// which can be implemented as:
2581 /// cmp C
2582 /// ccmp D, inv(CD), CC
2583 /// ccmp A, CA, inv(CD)
2584 /// ccmp B, CB, inv(CA)
2585 /// check for CB flags
2586 ///
2587 /// A counterexample is "or (and A B) (and C D)" which translates to
2588 /// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
2589 /// can only implement 1 of the inner (not) operations, but not both!
2590 /// @{
2591 
2592 /// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
2594  ISD::CondCode CC, SDValue CCOp,
2596  AArch64CC::CondCode OutCC,
2597  const SDLoc &DL, SelectionDAG &DAG) {
2598  unsigned Opcode = 0;
2599  const bool FullFP16 =
2600  static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2601 
2602  if (LHS.getValueType().isFloatingPoint()) {
2603  assert(LHS.getValueType() != MVT::f128);
2604  if (LHS.getValueType() == MVT::f16 && !FullFP16) {
2605  LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
2606  RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
2607  }
2608  Opcode = AArch64ISD::FCCMP;
2609  } else if (RHS.getOpcode() == ISD::SUB) {
2610  SDValue SubOp0 = RHS.getOperand(0);
2611  if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2612  // See emitComparison() on why we can only do this for SETEQ and SETNE.
2613  Opcode = AArch64ISD::CCMN;
2614  RHS = RHS.getOperand(1);
2615  }
2616  }
2617  if (Opcode == 0)
2618  Opcode = AArch64ISD::CCMP;
2619 
2620  SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
2622  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
2623  SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
2624  return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
2625 }
2626 
2627 /// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
2628 /// expressed as a conjunction. See \ref AArch64CCMP.
2629 /// \param CanNegate Set to true if we can negate the whole sub-tree just by
2630 /// changing the conditions on the SETCC tests.
2631 /// (this means we can call emitConjunctionRec() with
2632 /// Negate==true on this sub-tree)
2633 /// \param MustBeFirst Set to true if this subtree needs to be negated and we
2634 /// cannot do the negation naturally. We are required to
2635 /// emit the subtree first in this case.
2636 /// \param WillNegate Is true if are called when the result of this
2637 /// subexpression must be negated. This happens when the
2638 /// outer expression is an OR. We can use this fact to know
2639 /// that we have a double negation (or (or ...) ...) that
2640 /// can be implemented for free.
2641 static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
2642  bool &MustBeFirst, bool WillNegate,
2643  unsigned Depth = 0) {
2644  if (!Val.hasOneUse())
2645  return false;
2646  unsigned Opcode = Val->getOpcode();
2647  if (Opcode == ISD::SETCC) {
2648  if (Val->getOperand(0).getValueType() == MVT::f128)
2649  return false;
2650  CanNegate = true;
2651  MustBeFirst = false;
2652  return true;
2653  }
2654  // Protect against exponential runtime and stack overflow.
2655  if (Depth > 6)
2656  return false;
2657  if (Opcode == ISD::AND || Opcode == ISD::OR) {
2658  bool IsOR = Opcode == ISD::OR;
2659  SDValue O0 = Val->getOperand(0);
2660  SDValue O1 = Val->getOperand(1);
2661  bool CanNegateL;
2662  bool MustBeFirstL;
2663  if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
2664  return false;
2665  bool CanNegateR;
2666  bool MustBeFirstR;
2667  if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
2668  return false;
2669 
2670  if (MustBeFirstL && MustBeFirstR)
2671  return false;
2672 
2673  if (IsOR) {
2674  // For an OR expression we need to be able to naturally negate at least
2675  // one side or we cannot do the transformation at all.
2676  if (!CanNegateL && !CanNegateR)
2677  return false;
2678  // If we the result of the OR will be negated and we can naturally negate
2679  // the leafs, then this sub-tree as a whole negates naturally.
2680  CanNegate = WillNegate && CanNegateL && CanNegateR;
2681  // If we cannot naturally negate the whole sub-tree, then this must be
2682  // emitted first.
2683  MustBeFirst = !CanNegate;
2684  } else {
2685  assert(Opcode == ISD::AND && "Must be OR or AND");
2686  // We cannot naturally negate an AND operation.
2687  CanNegate = false;
2688  MustBeFirst = MustBeFirstL || MustBeFirstR;
2689  }
2690  return true;
2691  }
2692  return false;
2693 }
2694 
2695 /// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
2696 /// of CCMP/CFCMP ops. See @ref AArch64CCMP.
2697 /// Tries to transform the given i1 producing node @p Val to a series compare
2698 /// and conditional compare operations. @returns an NZCV flags producing node
2699 /// and sets @p OutCC to the flags that should be tested or returns SDValue() if
2700 /// transformation was not possible.
2701 /// \p Negate is true if we want this sub-tree being negated just by changing
2702 /// SETCC conditions.
2704  AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
2706  // We're at a tree leaf, produce a conditional comparison operation.
2707  unsigned Opcode = Val->getOpcode();
2708  if (Opcode == ISD::SETCC) {
2709  SDValue LHS = Val->getOperand(0);
2710  SDValue RHS = Val->getOperand(1);
2711  ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
2712  bool isInteger = LHS.getValueType().isInteger();
2713  if (Negate)
2714  CC = getSetCCInverse(CC, LHS.getValueType());
2715  SDLoc DL(Val);
2716  // Determine OutCC and handle FP special case.
2717  if (isInteger) {
2718  OutCC = changeIntCCToAArch64CC(CC);
2719  } else {
2721  AArch64CC::CondCode ExtraCC;
2722  changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
2723  // Some floating point conditions can't be tested with a single condition
2724  // code. Construct an additional comparison in this case.
2725  if (ExtraCC != AArch64CC::AL) {
2726  SDValue ExtraCmp;
2727  if (!CCOp.getNode())
2728  ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
2729  else
2730  ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
2731  ExtraCC, DL, DAG);
2732  CCOp = ExtraCmp;
2733  Predicate = ExtraCC;
2734  }
2735  }
2736 
2737  // Produce a normal comparison if we are first in the chain
2738  if (!CCOp)
2739  return emitComparison(LHS, RHS, CC, DL, DAG);
2740  // Otherwise produce a ccmp.
2741  return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
2742  DAG);
2743  }
2744  assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
2745 
2746  bool IsOR = Opcode == ISD::OR;
2747 
2748  SDValue LHS = Val->getOperand(0);
2749  bool CanNegateL;
2750  bool MustBeFirstL;
2751  bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
2752  assert(ValidL && "Valid conjunction/disjunction tree");
2753  (void)ValidL;
2754 
2755  SDValue RHS = Val->getOperand(1);
2756  bool CanNegateR;
2757  bool MustBeFirstR;
2758  bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
2759  assert(ValidR && "Valid conjunction/disjunction tree");
2760  (void)ValidR;
2761 
2762  // Swap sub-tree that must come first to the right side.
2763  if (MustBeFirstL) {
2764  assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
2765  std::swap(LHS, RHS);
2766  std::swap(CanNegateL, CanNegateR);
2767  std::swap(MustBeFirstL, MustBeFirstR);
2768  }
2769 
2770  bool NegateR;
2771  bool NegateAfterR;
2772  bool NegateL;
2773  bool NegateAfterAll;
2774  if (Opcode == ISD::OR) {
2775  // Swap the sub-tree that we can negate naturally to the left.
2776  if (!CanNegateL) {
2777  assert(CanNegateR && "at least one side must be negatable");
2778  assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
2779  assert(!Negate);
2780  std::swap(LHS, RHS);
2781  NegateR = false;
2782  NegateAfterR = true;
2783  } else {
2784  // Negate the left sub-tree if possible, otherwise negate the result.
2785  NegateR = CanNegateR;
2786  NegateAfterR = !CanNegateR;
2787  }
2788  NegateL = true;
2789  NegateAfterAll = !Negate;
2790  } else {
2791  assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree");
2792  assert(!Negate && "Valid conjunction/disjunction tree");
2793 
2794  NegateL = false;
2795  NegateR = false;
2796  NegateAfterR = false;
2797  NegateAfterAll = false;
2798  }
2799 
2800  // Emit sub-trees.
2801  AArch64CC::CondCode RHSCC;
2802  SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
2803  if (NegateAfterR)
2804  RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
2805  SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
2806  if (NegateAfterAll)
2807  OutCC = AArch64CC::getInvertedCondCode(OutCC);
2808  return CmpL;
2809 }
2810 
2811 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
2812 /// In some cases this is even possible with OR operations in the expression.
2813 /// See \ref AArch64CCMP.
2814 /// \see emitConjunctionRec().
2816  AArch64CC::CondCode &OutCC) {
2817  bool DummyCanNegate;
2818  bool DummyMustBeFirst;
2819  if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
2820  return SDValue();
2821 
2822  return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
2823 }
2824 
2825 /// @}
2826 
2827 /// Returns how profitable it is to fold a comparison's operand's shift and/or
2828 /// extension operations.
2830  auto isSupportedExtend = [&](SDValue V) {
2831  if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
2832  return true;
2833 
2834  if (V.getOpcode() == ISD::AND)
2835  if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2836  uint64_t Mask = MaskCst->getZExtValue();
2837  return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
2838  }
2839 
2840  return false;
2841  };
2842 
2843  if (!Op.hasOneUse())
2844  return 0;
2845 
2846  if (isSupportedExtend(Op))
2847  return 1;
2848 
2849  unsigned Opc = Op.getOpcode();
2850  if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
2851  if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2852  uint64_t Shift = ShiftCst->getZExtValue();
2853  if (isSupportedExtend(Op.getOperand(0)))
2854  return (Shift <= 4) ? 2 : 1;
2855  EVT VT = Op.getValueType();
2856  if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
2857  return 1;
2858  }
2859 
2860  return 0;
2861 }
2862 
2864  SDValue &AArch64cc, SelectionDAG &DAG,
2865  const SDLoc &dl) {
2866  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2867  EVT VT = RHS.getValueType();
2868  uint64_t C = RHSC->getZExtValue();
2869  if (!isLegalArithImmed(C)) {
2870  // Constant does not fit, try adjusting it by one?
2871  switch (CC) {
2872  default:
2873  break;
2874  case ISD::SETLT:
2875  case ISD::SETGE:
2876  if ((VT == MVT::i32 && C != 0x80000000 &&
2877  isLegalArithImmed((uint32_t)(C - 1))) ||
2878  (VT == MVT::i64 && C != 0x80000000ULL &&
2879  isLegalArithImmed(C - 1ULL))) {
2880  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2881  C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2882  RHS = DAG.getConstant(C, dl, VT);
2883  }
2884  break;
2885  case ISD::SETULT:
2886  case ISD::SETUGE:
2887  if ((VT == MVT::i32 && C != 0 &&
2888  isLegalArithImmed((uint32_t)(C - 1))) ||
2889  (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
2890  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2891  C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2892  RHS = DAG.getConstant(C, dl, VT);
2893  }
2894  break;
2895  case ISD::SETLE:
2896  case ISD::SETGT:
2897  if ((VT == MVT::i32 && C != INT32_MAX &&
2898  isLegalArithImmed((uint32_t)(C + 1))) ||
2899  (VT == MVT::i64 && C != INT64_MAX &&
2900  isLegalArithImmed(C + 1ULL))) {
2901  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2902  C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2903  RHS = DAG.getConstant(C, dl, VT);
2904  }
2905  break;
2906  case ISD::SETULE:
2907  case ISD::SETUGT:
2908  if ((VT == MVT::i32 && C != UINT32_MAX &&
2909  isLegalArithImmed((uint32_t)(C + 1))) ||
2910  (VT == MVT::i64 && C != UINT64_MAX &&
2911  isLegalArithImmed(C + 1ULL))) {
2912  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2913  C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2914  RHS = DAG.getConstant(C, dl, VT);
2915  }
2916  break;
2917  }
2918  }
2919  }
2920 
2921  // Comparisons are canonicalized so that the RHS operand is simpler than the
2922  // LHS one, the extreme case being when RHS is an immediate. However, AArch64
2923  // can fold some shift+extend operations on the RHS operand, so swap the
2924  // operands if that can be done.
2925  //
2926  // For example:
2927  // lsl w13, w11, #1
2928  // cmp w13, w12
2929  // can be turned into:
2930  // cmp w12, w11, lsl #1
2931  if (!isa<ConstantSDNode>(RHS) ||
2932  !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
2933  SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
2934 
2936  std::swap(LHS, RHS);
2938  }
2939  }
2940 
2941  SDValue Cmp;
2942  AArch64CC::CondCode AArch64CC;
2943  if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
2944  const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
2945 
2946  // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
2947  // For the i8 operand, the largest immediate is 255, so this can be easily
2948  // encoded in the compare instruction. For the i16 operand, however, the
2949  // largest immediate cannot be encoded in the compare.
2950  // Therefore, use a sign extending load and cmn to avoid materializing the
2951  // -1 constant. For example,
2952  // movz w1, #65535
2953  // ldrh w0, [x0, #0]
2954  // cmp w0, w1
2955  // >
2956  // ldrsh w0, [x0, #0]
2957  // cmn w0, #1
2958  // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
2959  // if and only if (sext LHS) == (sext RHS). The checks are in place to
2960  // ensure both the LHS and RHS are truly zero extended and to make sure the
2961  // transformation is profitable.
2962  if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2963  cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
2964  cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
2965  LHS.getNode()->hasNUsesOfValue(1, 0)) {
2966  int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2967  if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
2968  SDValue SExt =
2969  DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2970  DAG.getValueType(MVT::i16));
2971  Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
2972  RHS.getValueType()),
2973  CC, dl, DAG);
2974  AArch64CC = changeIntCCToAArch64CC(CC);
2975  }
2976  }
2977 
2978  if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
2979  if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
2980  if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
2981  AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
2982  }
2983  }
2984  }
2985 
2986  if (!Cmp) {
2987  Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
2988  AArch64CC = changeIntCCToAArch64CC(CC);
2989  }
2990  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
2991  return Cmp;
2992 }
2993 
2994 static std::pair<SDValue, SDValue>
2996  assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
2997  "Unsupported value type");
2998  SDValue Value, Overflow;
2999  SDLoc DL(Op);
3000  SDValue LHS = Op.getOperand(0);
3001  SDValue RHS = Op.getOperand(1);
3002  unsigned Opc = 0;
3003  switch (Op.getOpcode()) {
3004  default:
3005  llvm_unreachable("Unknown overflow instruction!");
3006  case ISD::SADDO:
3007  Opc = AArch64ISD::ADDS;
3008  CC = AArch64CC::VS;
3009  break;
3010  case ISD::UADDO:
3011  Opc = AArch64ISD::ADDS;
3012  CC = AArch64CC::HS;
3013  break;
3014  case ISD::SSUBO:
3015  Opc = AArch64ISD::SUBS;
3016  CC = AArch64CC::VS;
3017  break;
3018  case ISD::USUBO:
3019  Opc = AArch64ISD::SUBS;
3020  CC = AArch64CC::LO;
3021  break;
3022  // Multiply needs a little bit extra work.
3023  case ISD::SMULO:
3024  case ISD::UMULO: {
3025  CC = AArch64CC::NE;
3026  bool IsSigned = Op.getOpcode() == ISD::SMULO;
3027  if (Op.getValueType() == MVT::i32) {
3028  // Extend to 64-bits, then perform a 64-bit multiply.
3029  unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3030  LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
3031  RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
3032  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3033  Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3034 
3035  // Check that the result fits into a 32-bit integer.
3036  SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
3037  if (IsSigned) {
3038  // cmp xreg, wreg, sxtw
3039  SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
3040  Overflow =
3041  DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1);
3042  } else {
3043  // tst xreg, #0xffffffff00000000
3044  SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64);
3045  Overflow =
3046  DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1);
3047  }
3048  break;
3049  }
3050  assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
3051  // For the 64 bit multiply
3052  Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3053  if (IsSigned) {
3054  SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
3055  SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
3056  DAG.getConstant(63, DL, MVT::i64));
3057  // It is important that LowerBits is last, otherwise the arithmetic
3058  // shift will not be folded into the compare (SUBS).
3059  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3060  Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
3061  .getValue(1);
3062  } else {
3063  SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
3064  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3065  Overflow =
3066  DAG.getNode(AArch64ISD::SUBS, DL, VTs,
3067  DAG.getConstant(0, DL, MVT::i64),
3068  UpperBits).getValue(1);
3069  }
3070  break;
3071  }
3072  } // switch (...)
3073 
3074  if (Opc) {
3075  SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
3076 
3077  // Emit the AArch64 operation with overflow check.
3078  Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
3079  Overflow = Value.getValue(1);
3080  }
3081  return std::make_pair(Value, Overflow);
3082 }
3083 
3084 SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
3085  if (useSVEForFixedLengthVectorVT(Op.getValueType()))
3086  return LowerToScalableOp(Op, DAG);
3087 
3088  SDValue Sel = Op.getOperand(0);
3089  SDValue Other = Op.getOperand(1);
3090  SDLoc dl(Sel);
3091 
3092  // If the operand is an overflow checking operation, invert the condition
3093  // code and kill the Not operation. I.e., transform:
3094  // (xor (overflow_op_bool, 1))
3095  // -->
3096  // (csel 1, 0, invert(cc), overflow_op_bool)
3097  // ... which later gets transformed to just a cset instruction with an
3098  // inverted condition code, rather than a cset + eor sequence.
3099  if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
3100  // Only lower legal XALUO ops.
3101  if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
3102  return SDValue();
3103 
3104  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3105  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3107  SDValue Value, Overflow;
3108  std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
3109  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3110  return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
3111  CCVal, Overflow);
3112  }
3113  // If neither operand is a SELECT_CC, give up.
3114  if (Sel.getOpcode() != ISD::SELECT_CC)
3115  std::swap(Sel, Other);
3116  if (Sel.getOpcode() != ISD::SELECT_CC)
3117  return Op;
3118 
3119  // The folding we want to perform is:
3120  // (xor x, (select_cc a, b, cc, 0, -1) )
3121  // -->
3122  // (csel x, (xor x, -1), cc ...)
3123  //
3124  // The latter will get matched to a CSINV instruction.
3125 
3126  ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
3127  SDValue LHS = Sel.getOperand(0);
3128  SDValue RHS = Sel.getOperand(1);
3129  SDValue TVal = Sel.getOperand(2);
3130  SDValue FVal = Sel.getOperand(3);
3131 
3132  // FIXME: This could be generalized to non-integer comparisons.
3133  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
3134  return Op;
3135 
3136  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
3137  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
3138 
3139  // The values aren't constants, this isn't the pattern we're looking for.
3140  if (!CFVal || !CTVal)
3141  return Op;
3142 
3143  // We can commute the SELECT_CC by inverting the condition. This
3144  // might be needed to make this fit into a CSINV pattern.
3145  if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
3146  std::swap(TVal, FVal);
3147  std::swap(CTVal, CFVal);
3148  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
3149  }
3150 
3151  // If the constants line up, perform the transform!
3152  if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
3153  SDValue CCVal;
3154  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
3155 
3156  FVal = Other;
3157  TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
3158  DAG.getConstant(-1ULL, dl, Other.getValueType()));
3159 
3160  return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
3161  CCVal, Cmp);
3162  }
3163 
3164  return Op;
3165 }
3166 
3168  EVT VT = Op.getValueType();
3169 
3170  // Let legalize expand this if it isn't a legal type yet.
3171  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3172  return SDValue();
3173 
3174  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3175 
3176  unsigned Opc;
3177  bool ExtraOp = false;
3178  switch (Op.getOpcode()) {
3179  default:
3180  llvm_unreachable("Invalid code");
3181  case ISD::ADDC:
3182  Opc = AArch64ISD::ADDS;
3183  break;
3184  case ISD::SUBC:
3185  Opc = AArch64ISD::SUBS;
3186  break;
3187  case ISD::ADDE:
3188  Opc = AArch64ISD::ADCS;
3189  ExtraOp = true;
3190  break;
3191  case ISD::SUBE:
3192  Opc = AArch64ISD::SBCS;
3193  ExtraOp = true;
3194  break;
3195  }
3196 
3197  if (!ExtraOp)
3198  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
3199  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
3200  Op.getOperand(2));
3201 }
3202 
3204  // Let legalize expand this if it isn't a legal type yet.
3205  if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3206  return SDValue();
3207 
3208  SDLoc dl(Op);
3210  // The actual operation that sets the overflow or carry flag.
3211  SDValue Value, Overflow;
3212  std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
3213 
3214  // We use 0 and 1 as false and true values.
3215  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3216  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3217 
3218  // We use an inverted condition, because the conditional select is inverted
3219  // too. This will allow it to be selected to a single instruction:
3220  // CSINC Wd, WZR, WZR, invert(cond).
3221  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3222  Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
3223  CCVal, Overflow);
3224 
3225  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3226  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3227 }
3228 
3229 // Prefetch operands are:
3230 // 1: Address to prefetch
3231 // 2: bool isWrite
3232 // 3: int locality (0 = no locality ... 3 = extreme locality)
3233 // 4: bool isDataCache
3235  SDLoc DL(Op);
3236  unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3237  unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
3238  unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3239 
3240  bool IsStream = !Locality;
3241  // When the locality number is set
3242  if (Locality) {
3243  // The front-end should have filtered out the out-of-range values
3244  assert(Locality <= 3 && "Prefetch locality out-of-range");
3245  // The locality degree is the opposite of the cache speed.
3246  // Put the number the other way around.
3247  // The encoding starts at 0 for level 1
3248  Locality = 3 - Locality;
3249  }
3250 
3251  // built the mask value encoding the expected behavior.
3252  unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
3253  (!IsData << 3) | // IsDataCache bit
3254  (Locality << 1) | // Cache level bits
3255  (unsigned)IsStream; // Stream bit
3256  return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
3257  DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
3258 }
3259 
3260 SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
3261  SelectionDAG &DAG) const {
3262  EVT VT = Op.getValueType();
3263  if (VT.isScalableVector())
3264  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
3265 
3266  if (useSVEForFixedLengthVectorVT(VT))
3267  return LowerFixedLengthFPExtendToSVE(Op, DAG);
3268 
3269  assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
3270  return SDValue();
3271 }
3272 
3273 SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
3274  SelectionDAG &DAG) const {
3275  if (Op.getValueType().isScalableVector())
3276  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
3277 
3278  bool IsStrict = Op->isStrictFPOpcode();
3279  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3280  EVT SrcVT = SrcVal.getValueType();
3281 
3282  if (useSVEForFixedLengthVectorVT(SrcVT))
3283  return LowerFixedLengthFPRoundToSVE(Op, DAG);
3284 
3285  if (SrcVT != MVT::f128) {
3286  // Expand cases where the input is a vector bigger than NEON.
3287  if (useSVEForFixedLengthVectorVT(SrcVT))
3288  return SDValue();
3289 
3290  // It's legal except when f128 is involved
3291  return Op;
3292  }
3293 
3294  return SDValue();
3295 }
3296 
3297 SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
3298  SelectionDAG &DAG) const {
3299  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3300  // Any additional optimization in this function should be recorded
3301  // in the cost tables.
3302  EVT InVT = Op.getOperand(0).getValueType();
3303  EVT VT = Op.getValueType();
3304 
3305  if (VT.isScalableVector()) {
3306  unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
3309  return LowerToPredicatedOp(Op, DAG, Opcode);
3310  }
3311 
3312  if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3313  return LowerFixedLengthFPToIntToSVE(Op, DAG);
3314 
3315  unsigned NumElts = InVT.getVectorNumElements();
3316 
3317  // f16 conversions are promoted to f32 when full fp16 is not supported.
3318  if (InVT.getVectorElementType() == MVT::f16 &&
3319  !Subtarget->hasFullFP16()) {
3320  MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
3321  SDLoc dl(Op);
3322  return DAG.getNode(
3323  Op.getOpcode(), dl, Op.getValueType(),
3324  DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
3325  }
3326 
3327  uint64_t VTSize = VT.getFixedSizeInBits();
3328  uint64_t InVTSize = InVT.getFixedSizeInBits();
3329  if (VTSize < InVTSize) {
3330  SDLoc dl(Op);
3331  SDValue Cv =
3332  DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
3333  Op.getOperand(0));
3334  return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
3335  }
3336 
3337  if (VTSize > InVTSize) {
3338  SDLoc dl(Op);
3339  MVT ExtVT =
3341  VT.getVectorNumElements());
3342  SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
3343  return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
3344  }
3345 
3346  // Type changing conversions are illegal.
3347  return Op;
3348 }
3349 
3350 SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
3351  SelectionDAG &DAG) const {
3352  bool IsStrict = Op->isStrictFPOpcode();
3353  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3354 
3355  if (SrcVal.getValueType().isVector())
3356  return LowerVectorFP_TO_INT(Op, DAG);
3357 
3358  // f16 conversions are promoted to f32 when full fp16 is not supported.
3359  if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
3360  assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
3361  SDLoc dl(Op);
3362  return DAG.getNode(
3363  Op.getOpcode(), dl, Op.getValueType(),
3364  DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
3365  }
3366 
3367  if (SrcVal.getValueType() != MVT::f128) {
3368  // It's legal except when f128 is involved
3369  return Op;
3370  }
3371 
3372  return SDValue();
3373 }
3374 
3375 SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
3376  SelectionDAG &DAG) const {
3377  // AArch64 FP-to-int conversions saturate to the destination register size, so
3378  // we can lower common saturating conversions to simple instructions.
3379  SDValue SrcVal = Op.getOperand(0);
3380 
3381  EVT SrcVT = SrcVal.getValueType();
3382  EVT DstVT = Op.getValueType();
3383 
3384  EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3385  uint64_t SatWidth = SatVT.getScalarSizeInBits();
3386  uint64_t DstWidth = DstVT.getScalarSizeInBits();
3387  assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width");
3388 
3389  // TODO: Support lowering of NEON and SVE conversions.
3390  if (SrcVT.isVector())
3391  return SDValue();
3392 
3393  // TODO: Saturate to SatWidth explicitly.
3394  if (SatWidth != DstWidth)
3395  return SDValue();
3396 
3397  // In the absence of FP16 support, promote f32 to f16, like LowerFP_TO_INT().
3398  if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16())
3399  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
3400  DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal),
3401  Op.getOperand(1));
3402 
3403  // Cases that we can emit directly.
3404  if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
3405  (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
3406  (DstVT == MVT::i64 || DstVT == MVT::i32))
3407  return Op;
3408 
3409  // For all other cases, fall back on the expanded form.
3410  return SDValue();
3411 }
3412 
3413 SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
3414  SelectionDAG &DAG) const {
3415  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3416  // Any additional optimization in this function should be recorded
3417  // in the cost tables.
3418  EVT VT = Op.getValueType();
3419  SDLoc dl(Op);
3420  SDValue In = Op.getOperand(0);
3421  EVT InVT = In.getValueType();
3422  unsigned Opc = Op.getOpcode();
3423  bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
3424 
3425  if (VT.isScalableVector()) {
3426  if (InVT.getVectorElementType() == MVT::i1) {
3427  // We can't directly extend an SVE predicate; extend it first.
3428  unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3429  EVT CastVT = getPromotedVTForPredicate(InVT);
3430  In = DAG.getNode(CastOpc, dl, CastVT, In);
3431  return DAG.getNode(Opc, dl, VT, In);
3432  }
3433 
3434  unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
3436  return LowerToPredicatedOp(Op, DAG, Opcode);
3437  }
3438 
3439  if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3440  return LowerFixedLengthIntToFPToSVE(Op, DAG);
3441 
3442  uint64_t VTSize = VT.getFixedSizeInBits();
3443  uint64_t InVTSize = InVT.getFixedSizeInBits();
3444  if (VTSize < InVTSize) {
3445  MVT CastVT =
3447  InVT.getVectorNumElements());
3448  In = DAG.getNode(Opc, dl, CastVT, In);
3449  return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
3450  }
3451 
3452  if (VTSize > InVTSize) {
3453  unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3454  EVT CastVT = VT.changeVectorElementTypeToInteger();
3455  In = DAG.getNode(CastOpc, dl, CastVT, In);
3456  return DAG.getNode(Opc, dl, VT, In);
3457  }
3458 
3459  return Op;
3460 }
3461 
3462 SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
3463  SelectionDAG &DAG) const {
3464  if (Op.getValueType().isVector())
3465  return LowerVectorINT_TO_FP(Op, DAG);
3466 
3467  bool IsStrict = Op->isStrictFPOpcode();
3468  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3469 
3470  // f16 conversions are promoted to f32 when full fp16 is not supported.
3471  if (Op.getValueType() == MVT::f16 &&
3472  !Subtarget->hasFullFP16()) {
3473  assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
3474  SDLoc dl(Op);
3475  return DAG.getNode(
3476  ISD::FP_ROUND, dl, MVT::f16,
3477  DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
3478  DAG.getIntPtrConstant(0, dl));
3479  }
3480 
3481  // i128 conversions are libcalls.
3482  if (SrcVal.getValueType() == MVT::i128)
3483  return SDValue();
3484 
3485  // Other conversions are legal, unless it's to the completely software-based
3486  // fp128.
3487  if (Op.getValueType() != MVT::f128)
3488  return Op;
3489  return SDValue();
3490 }
3491 
3492 SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
3493  SelectionDAG &DAG) const {
3494  // For iOS, we want to call an alternative entry point: __sincos_stret,
3495  // which returns the values in two S / D registers.
3496  SDLoc dl(Op);
3497  SDValue Arg = Op.getOperand(0);
3498  EVT ArgVT = Arg.getValueType();
3499  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
3500 
3501  ArgListTy Args;
3502  ArgListEntry Entry;
3503 
3504  Entry.Node = Arg;
3505  Entry.Ty = ArgTy;
3506  Entry.IsSExt = false;
3507  Entry.IsZExt = false;
3508  Args.push_back(Entry);
3509 
3510  RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
3511  : RTLIB::SINCOS_STRET_F32;
3512  const char *LibcallName = getLibcallName(LC);
3513  SDValue Callee =
3514  DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
3515 
3516  StructType *RetTy = StructType::get(ArgTy, ArgTy);
3518  CLI.setDebugLoc(dl)
3519  .setChain(DAG.getEntryNode())
3520  .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
3521 
3522  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3523  return CallResult.first;
3524 }
3525 
3526 static MVT getSVEContainerType(EVT ContentTy);
3527 
3528 SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
3529  SelectionDAG &DAG) const {
3530  EVT OpVT = Op.getValueType();
3531  EVT ArgVT = Op.getOperand(0).getValueType();
3532 
3533  if (useSVEForFixedLengthVectorVT(OpVT))
3534  return LowerFixedLengthBitcastToSVE(Op, DAG);
3535 
3536  if (OpVT.isScalableVector()) {
3537  if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
3538  assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&
3539  "Expected int->fp bitcast!");
3540  SDValue ExtResult =
3542  Op.getOperand(0));
3543  return getSVESafeBitCast(OpVT, ExtResult, DAG);
3544  }
3545  return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
3546  }
3547 
3548  if (OpVT != MVT::f16 && OpVT != MVT::bf16)
3549  return SDValue();
3550 
3551  assert(ArgVT == MVT::i16);
3552  SDLoc DL(Op);
3553 
3554  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
3555  Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
3556  return SDValue(
3557  DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
3558  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
3559  0);
3560 }
3561 
3562 static EVT getExtensionTo64Bits(const EVT &OrigVT) {
3563  if (OrigVT.getSizeInBits() >= 64)
3564  return OrigVT;
3565 
3566  assert(OrigVT.isSimple() && "Expecting a simple value type");
3567 
3568  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
3569  switch (OrigSimpleTy) {
3570  default: llvm_unreachable("Unexpected Vector Type");
3571  case MVT::v2i8:
3572  case MVT::v2i16:
3573  return MVT::v2i32;
3574  case MVT::v4i8:
3575  return MVT::v4i16;
3576  }
3577 }
3578 
3580  const EVT &OrigTy,
3581  const EVT &ExtTy,
3582  unsigned ExtOpcode) {
3583  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
3584  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
3585  // 64-bits we need to insert a new extension so that it will be 64-bits.
3586  assert(ExtTy.is128BitVector() && "Unexpected extension size");
3587  if (OrigTy.getSizeInBits() >= 64)
3588  return N;
3589 
3590  // Must extend size to at least 64 bits to be used as an operand for VMULL.
3591  EVT NewVT = getExtensionTo64Bits(OrigTy);
3592 
3593  return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
3594 }
3595 
3597  bool isSigned) {
3598  EVT VT = N->getValueType(0);
3599 
3600  if (N->getOpcode() != ISD::BUILD_VECTOR)
3601  return false;
3602 
3603  for (const SDValue &Elt : N->op_values()) {
3604  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
3605  unsigned EltSize = VT.getScalarSizeInBits();
3606  unsigned HalfSize = EltSize / 2;
3607  if (isSigned) {
3608  if (!isIntN(HalfSize, C->getSExtValue()))
3609  return false;
3610  } else {
3611  if (!isUIntN(HalfSize, C->getZExtValue()))
3612  return false;
3613  }
3614  continue;
3615  }
3616  return false;
3617  }
3618 
3619  return true;
3620 }
3621 
3623  if (N->getOpcode() == ISD::SIGN_EXTEND ||
3624  N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
3625  return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
3626  N->getOperand(0)->getValueType(0),
3627  N->getValueType(0),
3628  N->getOpcode());
3629 
3630  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
3631  EVT VT = N->getValueType(0);
3632  SDLoc dl(N);
3633  unsigned EltSize = VT.getScalarSizeInBits() / 2;
3634  unsigned NumElts = VT.getVectorNumElements();
3635  MVT TruncVT = MVT::getIntegerVT(EltSize);
3637  for (unsigned i = 0; i != NumElts; ++i) {
3638  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
3639  const APInt &CInt = C->getAPIntValue();
3640  // Element types smaller than 32 bits are not legal, so use i32 elements.
3641  // The values are implicitly truncated so sext vs. zext doesn't matter.
3642  Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
3643  }
3644  return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
3645 }
3646 
3647 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
3648  return N->getOpcode() == ISD::SIGN_EXTEND ||
3649  N->getOpcode() == ISD::ANY_EXTEND ||
3650  isExtendedBUILD_VECTOR(N, DAG, true);
3651 }
3652 
3653 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
3654  return N->getOpcode() == ISD::ZERO_EXTEND ||
3655  N->getOpcode() == ISD::ANY_EXTEND ||
3656  isExtendedBUILD_VECTOR(N, DAG, false);
3657 }
3658 
3659 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
3660  unsigned Opcode = N->getOpcode();
3661  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3662  SDNode *N0 = N->getOperand(0).getNode();
3663  SDNode *N1 = N->getOperand(1).getNode();
3664  return N0->hasOneUse() && N1->hasOneUse() &&
3665  isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
3666  }
3667  return false;
3668 }
3669 
3670 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
3671  unsigned Opcode = N->getOpcode();
3672  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3673  SDNode *N0 = N->getOperand(0).getNode();
3674  SDNode *N1 = N->getOperand(1).getNode();
3675  return N0->hasOneUse() && N1->hasOneUse() &&
3676  isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
3677  }
3678  return false;
3679 }
3680 
3681 SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
3682  SelectionDAG &DAG) const {
3683  // The rounding mode is in bits 23:22 of the FPSCR.
3684  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
3685  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
3686  // so that the shift + and get folded into a bitfield extract.
3687  SDLoc dl(Op);
3688 
3689  SDValue Chain = Op.getOperand(0);
3690  SDValue FPCR_64 = DAG.getNode(
3692  {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
3693  Chain = FPCR_64.getValue(1);
3694  SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
3695  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
3696  DAG.getConstant(1U << 22, dl, MVT::i32));
3697  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
3698  DAG.getConstant(22, dl, MVT::i32));
3699  SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
3700  DAG.getConstant(3, dl, MVT::i32));
3701  return DAG.getMergeValues({AND, Chain}, dl);
3702 }
3703 
3704 SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
3705  SelectionDAG &DAG) const {
3706  SDLoc DL(Op);
3707  SDValue Chain = Op->getOperand(0);
3708  SDValue RMValue = Op->getOperand(1);
3709 
3710  // The rounding mode is in bits 23:22 of the FPCR.
3711  // The llvm.set.rounding argument value to the rounding mode in FPCR mapping
3712  // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
3713  // ((arg - 1) & 3) << 22).
3714  //
3715  // The argument of llvm.set.rounding must be within the segment [0, 3], so
3716  // NearestTiesToAway (4) is not handled here. It is responsibility of the code
3717  // generated llvm.set.rounding to ensure this condition.
3718 
3719  // Calculate new value of FPCR[23:22].
3720  RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
3721  DAG.getConstant(1, DL, MVT::i32));
3722  RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
3723  DAG.getConstant(0x3, DL, MVT::i32));
3724  RMValue =
3725  DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
3727  RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);
3728 
3729  // Get current value of FPCR.
3730  SDValue Ops[] = {
3731  Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
3732  SDValue FPCR =
3734  Chain = FPCR.getValue(1);
3735  FPCR = FPCR.getValue(0);
3736 
3737  // Put new rounding mode into FPSCR[23:22].
3738  const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
3739  FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
3740  DAG.getConstant(RMMask, DL, MVT::i64));
3741  FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
3742  SDValue Ops2[] = {
3743  Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
3744  FPCR};
3745  return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
3746 }
3747 
3748 SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
3749  EVT VT = Op.getValueType();
3750 
3751  // If SVE is available then i64 vector multiplications can also be made legal.
3752  bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
3753 
3754  if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
3755  return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
3756 
3757  // Multiplications are only custom-lowered for 128-bit vectors so that
3758  // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
3759  assert(VT.is128BitVector() && VT.isInteger() &&
3760  "unexpected type for custom-lowering ISD::MUL");
3761  SDNode *N0 = Op.getOperand(0).getNode();
3762  SDNode *N1 = Op.getOperand(1).getNode();
3763  unsigned NewOpc = 0;
3764  bool isMLA = false;
3765  bool isN0SExt = isSignExtended(N0, DAG);
3766  bool isN1SExt = isSignExtended(N1, DAG);
3767  if (isN0SExt && isN1SExt)
3768  NewOpc = AArch64ISD::SMULL;
3769  else {
3770  bool isN0ZExt = isZeroExtended(N0, DAG);
3771  bool isN1ZExt = isZeroExtended(N1, DAG);
3772  if (isN0ZExt && isN1ZExt)
3773  NewOpc = AArch64ISD::UMULL;
3774  else if (isN1SExt || isN1ZExt) {
3775  // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
3776  // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
3777  if (isN1SExt && isAddSubSExt(N0, DAG)) {
3778  NewOpc = AArch64ISD::SMULL;
3779  isMLA = true;
3780  } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
3781  NewOpc = AArch64ISD::UMULL;
3782  isMLA = true;
3783  } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
3784  std::swap(N0, N1);
3785  NewOpc = AArch64ISD::UMULL;
3786  isMLA = true;
3787  }
3788  }
3789 
3790  if (!NewOpc) {
3791  if (VT == MVT::v2i64)
3792  // Fall through to expand this. It is not legal.
3793  return SDValue();
3794  else
3795  // Other vector multiplications are legal.
3796  return Op;
3797  }
3798  }
3799 
3800  // Legalize to a S/UMULL instruction
3801  SDLoc DL(Op);
3802  SDValue Op0;
3803  SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
3804  if (!isMLA) {
3805  Op0 = skipExtensionForVectorMULL(N0, DAG);
3806  assert(Op0.getValueType().is64BitVector() &&
3807  Op1.getValueType().is64BitVector() &&
3808  "unexpected types for extended operands to VMULL");
3809  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
3810  }
3811  // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
3812  // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
3813  // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
3816  EVT Op1VT = Op1.getValueType();
3817  return DAG.getNode(N0->getOpcode(), DL, VT,
3818  DAG.getNode(NewOpc, DL, VT,
3819  DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
3820  DAG.getNode(NewOpc, DL, VT,
3821  DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
3822 }
3823 
3824 static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
3825  int Pattern) {
3826  return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
3828 }
3829 
3831  SDLoc DL(Op);
3832  EVT OutVT = Op.getValueType();
3833  SDValue InOp = Op.getOperand(1);
3834  EVT InVT = InOp.getValueType();
3835 
3836  // Return the operand if the cast isn't changing type,
3837  // i.e. <n x 16 x i1> -> <n x 16 x i1>
3838  if (InVT == OutVT)
3839  return InOp;
3840 
3841  SDValue Reinterpret =
3842  DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, InOp);
3843 
3844  // If the argument converted to an svbool is a ptrue or a comparison, the
3845  // lanes introduced by the widening are zero by construction.
3846  switch (InOp.getOpcode()) {
3848  return Reinterpret;
3850  if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue)
3851  return Reinterpret;
3852  }
3853 
3854  // Otherwise, zero the newly introduced lanes.
3856  SDValue MaskReinterpret =
3858  return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
3859 }
3860 
3861 SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3862  SelectionDAG &DAG) const {
3863  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3864  SDLoc dl(Op);
3865  switch (IntNo) {
3866  default: return SDValue(); // Don't custom lower most intrinsics.
3867  case Intrinsic::thread_pointer: {
3868  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3869  return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
3870  }
3871  case Intrinsic::aarch64_neon_abs: {
3872  EVT Ty = Op.getValueType();
3873  if (Ty == MVT::i64) {
3875  Op.getOperand(1));
3876  Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
3877  return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
3878  } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
3879  return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
3880  } else {
3881  report_fatal_error("Unexpected type for AArch64 NEON intrinic");
3882  }
3883  }
3884  case Intrinsic::aarch64_neon_smax:
3885  return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
3886  Op.getOperand(1), Op.getOperand(2));
3887  case Intrinsic::aarch64_neon_umax:
3888  return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
3889  Op.getOperand(1), Op.getOperand(2));
3890  case Intrinsic::aarch64_neon_smin:
3891  return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
3892  Op.getOperand(1), Op.getOperand(2));
3893  case Intrinsic::aarch64_neon_umin:
3894  return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
3895  Op.getOperand(1), Op.getOperand(2));
3896 
3897  case Intrinsic::aarch64_sve_sunpkhi:
3898  return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
3899  Op.getOperand(1));
3900  case Intrinsic::aarch64_sve_sunpklo:
3901  return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
3902  Op.getOperand(1));
3903  case Intrinsic::aarch64_sve_uunpkhi:
3904  return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
3905  Op.getOperand(1));
3906  case Intrinsic::aarch64_sve_uunpklo:
3907  return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
3908  Op.getOperand(1));
3909  case Intrinsic::aarch64_sve_clasta_n:
3910  return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
3911  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3912  case Intrinsic::aarch64_sve_clastb_n:
3913  return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
3914  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3915  case Intrinsic::aarch64_sve_lasta:
3916  return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
3917  Op.getOperand(1), Op.getOperand(2));
3918  case Intrinsic::aarch64_sve_lastb:
3919  return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
3920  Op.getOperand(1), Op.getOperand(2));
3921  case Intrinsic::aarch64_sve_rev:
3922  return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
3923  Op.getOperand(1));
3924  case Intrinsic::aarch64_sve_tbl:
3925  return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
3926  Op.getOperand(1), Op.getOperand(2));
3927  case Intrinsic::aarch64_sve_trn1:
3928  return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
3929  Op.getOperand(1), Op.getOperand(2));
3930  case Intrinsic::aarch64_sve_trn2:
3931  return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
3932  Op.getOperand(1), Op.getOperand(2));
3933  case Intrinsic::aarch64_sve_uzp1:
3934  return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
3935  Op.getOperand(1), Op.getOperand(2));
3936  case Intrinsic::aarch64_sve_uzp2:
3937  return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
3938  Op.getOperand(1), Op.getOperand(2));
3939  case Intrinsic::aarch64_sve_zip1:
3940  return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
3941  Op.getOperand(1), Op.getOperand(2));
3942  case Intrinsic::aarch64_sve_zip2:
3943  return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
3944  Op.getOperand(1), Op.getOperand(2));
3945  case Intrinsic::aarch64_sve_splice:
3946  return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
3947  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3948  case Intrinsic::aarch64_sve_ptrue:
3949  return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
3950  Op.getOperand(1));
3951  case Intrinsic::aarch64_sve_clz:
3952  return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
3953  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3954  case Intrinsic::aarch64_sve_cnt: {
3955  SDValue Data = Op.getOperand(3);
3956  // CTPOP only supports integer operands.
3957  if (Data.getValueType().isFloatingPoint())
3958  Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
3959  return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
3960  Op.getOperand(2), Data, Op.getOperand(1));
3961  }
3962  case Intrinsic::aarch64_sve_dupq_lane:
3963  return LowerDUPQLane(Op, DAG);
3964  case Intrinsic::aarch64_sve_convert_from_svbool:
3965  return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
3966  Op.getOperand(1));
3967  case Intrinsic::aarch64_sve_convert_to_svbool:
3968  return lowerConvertToSVBool(Op, DAG);
3969  case Intrinsic::aarch64_sve_fneg:
3970  return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
3971  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3972  case Intrinsic::aarch64_sve_frintp:
3973  return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
3974  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3975  case Intrinsic::aarch64_sve_frintm:
3976  return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
3977  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3978  case Intrinsic::aarch64_sve_frinti:
3979  return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
3980  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3981  case Intrinsic::aarch64_sve_frintx:
3982  return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
3983  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3984  case Intrinsic::aarch64_sve_frinta:
3985  return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
3986  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3987  case Intrinsic::aarch64_sve_frintn:
3988  return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
3989  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3990  case Intrinsic::aarch64_sve_frintz:
3991  return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
3992  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3993  case Intrinsic::aarch64_sve_ucvtf:
3995  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3996  Op.getOperand(1));
3997  case Intrinsic::aarch64_sve_scvtf:
3999  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4000  Op.getOperand(1));
4001  case Intrinsic::aarch64_sve_fcvtzu:
4003  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4004  Op.getOperand(1));
4005  case Intrinsic::aarch64_sve_fcvtzs:
4007  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4008  Op.getOperand(1));
4009  case Intrinsic::aarch64_sve_fsqrt:
4010  return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
4011  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4012  case Intrinsic::aarch64_sve_frecpx:
4013  return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
4014  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4015  case Intrinsic::aarch64_sve_fabs:
4016  return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4017  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4018  case Intrinsic::aarch64_sve_abs:
4019  return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4020  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4021  case Intrinsic::aarch64_sve_neg:
4022  return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
4023  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4024  case Intrinsic::aarch64_sve_insr: {
4025  SDValue Scalar = Op.getOperand(2);
4026  EVT ScalarTy = Scalar.getValueType();
4027  if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
4029 
4030  return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
4031  Op.getOperand(1), Scalar);
4032  }
4033  case Intrinsic::aarch64_sve_rbit:
4035  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4036  Op.getOperand(1));
4037  case Intrinsic::aarch64_sve_revb:
4038  return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
4039  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4040  case Intrinsic::aarch64_sve_sxtb:
4041  return DAG.getNode(
4043  Op.getOperand(2), Op.getOperand(3),
4044  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4045  Op.getOperand(1));
4046  case Intrinsic::aarch64_sve_sxth:
4047  return DAG.getNode(
4049  Op.getOperand(2), Op.getOperand(3),
4050  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4051  Op.getOperand(1));
4052  case Intrinsic::aarch64_sve_sxtw:
4053  return DAG.getNode(
4055  Op.getOperand(2), Op.getOperand(3),
4056  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4057  Op.getOperand(1));
4058  case Intrinsic::aarch64_sve_uxtb:
4059  return DAG.getNode(
4061  Op.getOperand(2), Op.getOperand(3),
4062  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4063  Op.getOperand(1));
4064  case Intrinsic::aarch64_sve_uxth:
4065  return DAG.getNode(
4067  Op.getOperand(2), Op.getOperand(3),
4068  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4069  Op.getOperand(1));
4070  case Intrinsic::aarch64_sve_uxtw:
4071  return DAG.getNode(
4073  Op.getOperand(2), Op.getOperand(3),
4074  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4075  Op.getOperand(1));
4076 
4077  case Intrinsic::localaddress: {
4078  const auto &MF = DAG.getMachineFunction();
4079  const auto *RegInfo = Subtarget->getRegisterInfo();
4080  unsigned Reg = RegInfo->getLocalAddressRegister(MF);
4081  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
4082  Op.getSimpleValueType());
4083  }
4084 
4085  case Intrinsic::eh_recoverfp: {
4086  // FIXME: This needs to be implemented to correctly handle highly aligned
4087  // stack objects. For now we simply return the incoming FP. Refer D53541
4088  // for more details.
4089  SDValue FnOp = Op.getOperand(1);
4090  SDValue IncomingFPOp = Op.getOperand(2);
4091  GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
4092  auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
4093  if (!Fn)
4095  "llvm.eh.recoverfp must take a function as the first argument");
4096  return IncomingFPOp;
4097  }
4098 
4099  case Intrinsic::aarch64_neon_vsri:
4100  case Intrinsic::aarch64_neon_vsli: {
4101  EVT Ty = Op.getValueType();
4102 
4103  if (!Ty.isVector())
4104  report_fatal_error("Unexpected type for aarch64_neon_vsli");
4105 
4106  assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits());
4107 
4108  bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
4109  unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
4110  return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
4111  Op.getOperand(3));
4112  }
4113 
4114  case Intrinsic::aarch64_neon_srhadd:
4115  case Intrinsic::aarch64_neon_urhadd:
4116  case Intrinsic::aarch64_neon_shadd:
4117  case Intrinsic::aarch64_neon_uhadd: {
4118  bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4119  IntNo == Intrinsic::aarch64_neon_shadd);
4120  bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4121  IntNo == Intrinsic::aarch64_neon_urhadd);
4122  unsigned Opcode =
4123  IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
4124  : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
4125  return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4126  Op.getOperand(2));
4127  }
4128  case Intrinsic::aarch64_neon_sabd:
4129  case Intrinsic::aarch64_neon_uabd: {
4130  unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
4131  : ISD::ABDS;
4132  return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4133  Op.getOperand(2));
4134  }
4135  case Intrinsic::aarch64_neon_uaddlp: {
4136  unsigned Opcode = AArch64ISD::UADDLP;
4137  return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
4138  }
4139  case Intrinsic::aarch64_neon_sdot:
4140  case Intrinsic::aarch64_neon_udot:
4141  case Intrinsic::aarch64_sve_sdot:
4142  case Intrinsic::aarch64_sve_udot: {
4143  unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
4144  IntNo == Intrinsic::aarch64_sve_udot)
4146  : AArch64ISD::SDOT;
4147  return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4148  Op.getOperand(2), Op.getOperand(3));
4149  }
4150  }
4151 }
4152 
4153 bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
4154  if (VT.getVectorElementType() == MVT::i8 ||
4155  VT.getVectorElementType() == MVT::i16) {
4156  EltTy = MVT::i32;
4157  return true;
4158  }
4159  return false;
4160 }
4161 
4162 bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
4163  if (VT.getVectorElementType() == MVT::i32 &&
4165  !VT.isFixedLengthVector())
4166  return true;
4167 
4168  return false;
4169 }
4170 
4171 bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
4172  return ExtVal.getValueType().isScalableVector();
4173 }
4174 
4175 unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4176  std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4177  {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4179  {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4181  {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4183  {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4185  {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4187  {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4189  {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4191  {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4193  };
4194  auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4195  return AddrModes.find(Key)->second;
4196 }
4197 
4198 unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4199  std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4200  {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4202  {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4204  {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4206  {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4208  {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4210  {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4212  {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4214  {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4216  };
4217  auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4218  return AddrModes.find(Key)->second;
4219 }
4220 
4221 unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
4222  switch (Opcode) {
4223  default:
4224  llvm_unreachable("unimplemented opcode");
4225  return Opcode;
4240  }
4241 }
4242 
4244  unsigned Opcode = Index.getOpcode();
4245  if (Opcode == ISD::SIGN_EXTEND_INREG)
4246  return true;
4247 
4248  if (Opcode == ISD::AND) {
4249  SDValue Splat = Index.getOperand(1);
4250  if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
4251  return false;
4252  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
4253  if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
4254  return false;
4255  return true;
4256  }
4257 
4258  return false;
4259 }
4260 
4261 // If the base pointer of a masked gather or scatter is null, we
4262 // may be able to swap BasePtr & Index and use the vector + register
4263 // or vector + immediate addressing mode, e.g.
4264 // VECTOR + REGISTER:
4265 // getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
4266 // -> getelementptr %offset, <vscale x N x T> %indices
4267 // VECTOR + IMMEDIATE:
4268 // getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
4269 // -> getelementptr #x, <vscale x N x T> %indices
4271  unsigned &Opcode, bool IsGather,
4272  SelectionDAG &DAG) {
4273  if (!isNullConstant(BasePtr))
4274  return;
4275 
4276  // FIXME: This will not match for fixed vector type codegen as the nodes in
4277  // question will have fixed<->scalable conversions around them. This should be
4278  // moved to a DAG combine or complex pattern so that is executes after all of
4279  // the fixed vector insert and extracts have been removed. This deficiency
4280  // will result in a sub-optimal addressing mode being used, i.e. an ADD not
4281  // being folded into the scatter/gather.
4282  ConstantSDNode *Offset = nullptr;
4283  if (Index.getOpcode() == ISD::ADD)
4284  if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
4285  if (isa<ConstantSDNode>(SplatVal))
4286  Offset = cast<ConstantSDNode>(SplatVal);
4287  else {
4288  BasePtr = SplatVal;
4289  Index = Index->getOperand(0);
4290  return;
4291  }
4292  }
4293 
4294  unsigned NewOp =
4296 
4297  if (!Offset) {
4298  std::swap(BasePtr, Index);
4299  Opcode = NewOp;
4300  return;
4301  }
4302 
4303  uint64_t OffsetVal = Offset->getZExtValue();
4304  unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
4305  auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
4306 
4307  if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
4308  // Index is out of range for the immediate addressing mode
4309  BasePtr = ConstOffset;
4310  Index = Index->getOperand(0);
4311  return;
4312  }
4313 
4314  // Immediate is in range
4315  Opcode = NewOp;
4316  BasePtr = Index->getOperand(0);
4317  Index = ConstOffset;
4318 }
4319 
4320 SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
4321  SelectionDAG &DAG) const {
4322  SDLoc DL(Op);
4323  MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
4324  assert(MGT && "Can only custom lower gather load nodes");
4325 
4326  bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector();
4327 
4328  SDValue Index = MGT->getIndex();
4329  SDValue Chain = MGT->getChain();
4330  SDValue PassThru = MGT->getPassThru();
4331  SDValue Mask = MGT->getMask();
4332  SDValue BasePtr = MGT->getBasePtr();
4333  ISD::LoadExtType ExtTy = MGT->getExtensionType();
4334 
4335  ISD::MemIndexType IndexType = MGT->getIndexType();
4336  bool IsScaled =
4337  IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4338  bool IsSigned =
4339  IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4340  bool IdxNeedsExtend =
4342  Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4343  bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
4344 
4345  EVT VT = PassThru.getSimpleValueType();
4346  EVT IndexVT = Index.getSimpleValueType();
4347  EVT MemVT = MGT->getMemoryVT();
4348  SDValue InputVT = DAG.getValueType(MemVT);
4349 
4350  if (VT.getVectorElementType() == MVT::bf16 &&
4351  !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4352  return SDValue();
4353 
4354  if (IsFixedLength) {
4355  assert(Subtarget->useSVEForFixedLengthVectors() &&
4356  "Cannot lower when not using SVE for fixed vectors");
4357  if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
4358  IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
4359  MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
4360  } else {
4361  MemVT = getContainerForFixedLengthVector(DAG, MemVT);
4362  IndexVT = MemVT.changeTypeToInteger();
4363  }
4364  InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
4365  Mask = DAG.getNode(
4368  }
4369 
4370  if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
4371  PassThru = SDValue();
4372 
4373  if (VT.isFloatingPoint() && !IsFixedLength) {
4374  // Handle FP data by using an integer gather and casting the result.
4375  if (PassThru) {
4376  EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4377  PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
4378  }
4379  InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4380  }
4381 
4382  SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other);
4383 
4385  Index = Index.getOperand(0);
4386 
4387  unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
4388  selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4389  /*isGather=*/true, DAG);
4390 
4391  if (ResNeedsSignExtend)
4392  Opcode = getSignExtendedGatherOpcode(Opcode);
4393 
4394  if (IsFixedLength) {
4395  if (Index.getSimpleValueType().isFixedLengthVector())
4396  Index = convertToScalableVector(DAG, IndexVT, Index);
4397  if (BasePtr.getSimpleValueType().isFixedLengthVector())
4398  BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
4400  }
4401 
4402  SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT};
4403  SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops);
4404  Chain = Result.getValue(1);
4405 
4406  if (IsFixedLength) {
4408  DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()),
4409  Result);
4410  Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result);
4411  Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
4412 
4413  if (PassThru)
4414  Result = DAG.getSelect(DL, VT, MGT->getMask(), Result, PassThru);
4415  } else {
4416  if (PassThru)
4417  Result = DAG.getSelect(DL, IndexVT, Mask, Result, PassThru);
4418 
4419  if (VT.isFloatingPoint())
4420  Result = getSVESafeBitCast(VT, Result, DAG);
4421  }
4422 
4423  return DAG.getMergeValues({Result, Chain}, DL);
4424 }
4425 
4426 SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
4427  SelectionDAG &DAG) const {
4428  SDLoc DL(Op);
4429  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
4430  assert(MSC && "Can only custom lower scatter store nodes");
4431 
4432  bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector();
4433 
4434  SDValue Index = MSC->getIndex();
4435  SDValue Chain = MSC->getChain();
4436  SDValue StoreVal = MSC->getValue();
4437  SDValue Mask = MSC->getMask();
4438  SDValue BasePtr = MSC->getBasePtr();
4439 
4440  ISD::MemIndexType IndexType = MSC->getIndexType();
4441  bool IsScaled =
4442  IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4443  bool IsSigned =
4444  IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4445  bool NeedsExtend =
4447  Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4448 
4449  EVT VT = StoreVal.getSimpleValueType();
4450  EVT IndexVT = Index.getSimpleValueType();
4451  SDVTList VTs = DAG.getVTList(MVT::Other);
4452  EVT MemVT = MSC->getMemoryVT();
4453  SDValue InputVT = DAG.getValueType(MemVT);
4454 
4455  if (VT.getVectorElementType() == MVT::bf16 &&
4456  !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4457  return SDValue();
4458 
4459  if (IsFixedLength) {
4460  assert(Subtarget->useSVEForFixedLengthVectors() &&
4461  "Cannot lower when not using SVE for fixed vectors");
4462  if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
4463  IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
4464  MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
4465  } else {
4466  MemVT = getContainerForFixedLengthVector(DAG, MemVT);
4467  IndexVT = MemVT.changeTypeToInteger();
4468  }
4469  InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
4470 
4471  StoreVal =
4472  DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal);
4473  StoreVal = DAG.getNode(
4475  VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
4476  StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
4477  Mask = DAG.getNode(
4480  } else if (VT.isFloatingPoint()) {
4481  // Handle FP data by casting the data so an integer scatter can be used.
4482  EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4483  StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
4484  InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4485  }
4486 
4488  Index = Index.getOperand(0);
4489 
4490  unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
4491  selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4492  /*isGather=*/false, DAG);
4493 
4494  if (IsFixedLength) {
4495  if (Index.getSimpleValueType().isFixedLengthVector())
4496  Index = convertToScalableVector(DAG, IndexVT, Index);
4497  if (BasePtr.getSimpleValueType().isFixedLengthVector())
4498  BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
4500  }
4501 
4502  SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
4503  return DAG.getNode(Opcode, DL, VTs, Ops);
4504 }
4505 
4506 SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
4507  SDLoc DL(Op);
4508  MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op);
4509  assert(LoadNode && "Expected custom lowering of a masked load node");
4510  EVT VT = Op->getValueType(0);
4511 
4512  if (useSVEForFixedLengthVectorVT(VT, true))
4513  return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
4514 
4515  SDValue PassThru = LoadNode->getPassThru();
4516  SDValue Mask = LoadNode->getMask();
4517 
4518  if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
4519  return Op;
4520 
4521  SDValue Load = DAG.getMaskedLoad(
4522  VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
4523  LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
4524  LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
4525  LoadNode->getExtensionType());
4526 
4527  SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru);
4528 
4529  return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
4530 }
4531 
4532 // Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
4534  EVT VT, EVT MemVT,
4535  SelectionDAG &DAG) {
4536  assert(VT.isVector() && "VT should be a vector type");
4537  assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
4538 
4539  SDValue Value = ST->getValue();
4540 
4541  // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
4542  // the word lane which represent the v4i8 subvector. It optimizes the store
4543  // to:
4544  //
4545  // xtn v0.8b, v0.8h
4546  // str s0, [x0]
4547 
4548  SDValue Undef = DAG.getUNDEF(MVT::i16);
4549  SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
4550  {Undef, Undef, Undef, Undef});
4551 
4552  SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
4553  Value, UndefVec);
4554  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
4555 
4556  Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
4557  SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4558  Trunc, DAG.getConstant(0, DL, MVT::i64));
4559 
4560  return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
4561  ST->getBasePtr(), ST->getMemOperand());
4562 }
4563 
4564 // Custom lowering for any store, vector or scalar and/or default or with
4565 // a truncate operations. Currently only custom lower truncate operation
4566 // from vector v4i16 to v4i8 or volatile stores of i128.
4567 SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
4568  SelectionDAG &DAG) const {
4569  SDLoc Dl(Op);
4570  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
4571  assert (StoreNode && "Can only custom lower store nodes");
4572 
4573  SDValue Value = StoreNode->getValue();
4574 
4575  EVT VT = Value.getValueType();
4576  EVT MemVT = StoreNode->getMemoryVT();
4577 
4578  if (VT.isVector()) {
4579  if (useSVEForFixedLengthVectorVT(VT, true))
4580  return LowerFixedLengthVectorStoreToSVE(Op, DAG);
4581 
4582  unsigned AS = StoreNode->getAddressSpace();
4583  Align Alignment = StoreNode->getAlign();
4584  if (Alignment < MemVT.getStoreSize() &&
4585  !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
4586  StoreNode->getMemOperand()->getFlags(),
4587  nullptr)) {
4588  return scalarizeVectorStore(StoreNode, DAG);
4589  }
4590 
4591  if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
4592  MemVT == MVT::v4i8) {
4593  return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
4594  }
4595  // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
4596  // the custom lowering, as there are no un-paired non-temporal stores and
4597  // legalization will break up 256 bit inputs.
4598  ElementCount EC = MemVT.getVectorElementCount();
4599  if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
4600  EC.isKnownEven() &&
4601  ((MemVT.getScalarSizeInBits() == 8u ||
4602  MemVT.getScalarSizeInBits() == 16u ||
4603  MemVT.getScalarSizeInBits() == 32u ||
4604  MemVT.getScalarSizeInBits() == 64u))) {
4605  SDValue Lo =
4607  MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4608  StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
4609  SDValue Hi =
4611  MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4612  StoreNode->getValue(),
4613  DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
4616  {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4617  StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4618  return Result;
4619  }
4620  } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
4621  assert(StoreNode->getValue()->getValueType(0) == MVT::i128);
4622  SDValue Lo =
4623  DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4624  DAG.getConstant(0, Dl, MVT::i64));
4625  SDValue Hi =
4626  DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4627  DAG.getConstant(1, Dl, MVT::i64));
4630  {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4631  StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4632  return Result;
4633  } else if (MemVT == MVT::i64x8) {
4634  SDValue Value = StoreNode->getValue();
4635  assert(Value->getValueType(0) == MVT::i64x8);
4636  SDValue Chain = StoreNode->getChain();
4637  SDValue Base = StoreNode->getBasePtr();
4638  EVT PtrVT = Base.getValueType();
4639  for (unsigned i = 0; i < 8; i++) {
4641  Value, DAG.getConstant(i, Dl, MVT::i32));
4642  SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
4643  DAG.getConstant(i * 8, Dl, PtrVT));
4644  Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
4645  StoreNode->getOriginalAlign());
4646  }
4647  return Chain;
4648  }
4649 
4650  return SDValue();
4651 }
4652 
4653 SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
4654  SelectionDAG &DAG) const {
4655  SDLoc DL(Op);
4656  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
4657  assert(LoadNode && "Expected custom lowering of a load node");
4658 
4659  if (LoadNode->getMemoryVT() == MVT::i64x8) {
4661  SDValue Base = LoadNode->getBasePtr();
4662  SDValue Chain = LoadNode->getChain();
4663  EVT PtrVT = Base.getValueType();
4664  for (unsigned i = 0; i < 8; i++) {
4665  SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
4666  DAG.getConstant(i * 8, DL, PtrVT));
4667  SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
4668  LoadNode->getPointerInfo(),
4669  LoadNode->getOriginalAlign());
4670  Ops.push_back(Part);
4671  Chain = SDValue(Part.getNode(), 1);
4672  }
4673  SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
4674  return DAG.getMergeValues({Loaded, Chain}, DL);
4675  }
4676 
4677  // Custom lowering for extending v4i8 vector loads.
4678  EVT VT = Op->getValueType(0);
4679  assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32");
4680 
4681  if (LoadNode->getMemoryVT() != MVT::v4i8)
4682  return SDValue();
4683 
4684  unsigned ExtType;
4685  if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
4686  ExtType = ISD::SIGN_EXTEND;
4687  else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
4688  LoadNode->getExtensionType() == ISD::EXTLOAD)
4689  ExtType = ISD::ZERO_EXTEND;
4690  else
4691  return SDValue();
4692 
4693  SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
4694  LoadNode->getBasePtr(), MachinePointerInfo());
4695  SDValue Chain = Load.getValue(1);
4697  SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
4698  SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
4700  DAG.getConstant(0, DL, MVT::i64));
4701  if (VT == MVT::v4i32)
4702  Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
4703  return DAG.getMergeValues({Ext, Chain}, DL);
4704 }
4705 
4706 // Generate SUBS and CSEL for integer abs.
4707 SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
4708  MVT VT = Op.getSimpleValueType();
4709 
4710  if (VT.isVector())
4711  return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
4712 
4713  SDLoc DL(Op);
4714  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4715  Op.getOperand(0));
4716  // Generate SUBS & CSEL.
4717  SDValue Cmp =
4718  DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
4719  Op.getOperand(0), DAG.getConstant(0, DL, VT));
4720  return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
4722  Cmp.getValue(1));
4723 }
4724 
4726  SelectionDAG &DAG) const {
4727  LLVM_DEBUG(dbgs() << "Custom lowering: ");
4728  LLVM_DEBUG(Op.dump());
4729 
4730  switch (Op.getOpcode()) {
4731  default:
4732  llvm_unreachable("unimplemented operand");
4733  return SDValue();
4734  case ISD::BITCAST:
4735  return LowerBITCAST(Op, DAG);
4736  case ISD::GlobalAddress:
4737  return LowerGlobalAddress(Op, DAG);
4738  case ISD::GlobalTLSAddress:
4739  return LowerGlobalTLSAddress(Op, DAG);
4740  case ISD::SETCC:
4741  case ISD::STRICT_FSETCC:
4742  case ISD::STRICT_FSETCCS:
4743  return LowerSETCC(Op, DAG);
4744  case ISD::BR_CC:
4745  return LowerBR_CC(Op, DAG);
4746  case ISD::SELECT:
4747  return LowerSELECT(Op, DAG);
4748  case ISD::SELECT_CC:
4749  return LowerSELECT_CC(Op, DAG);
4750  case ISD::JumpTable:
4751  return LowerJumpTable(Op, DAG);
4752  case ISD::BR_JT:
4753  return LowerBR_JT(Op, DAG);
4754  case ISD::ConstantPool:
4755  return LowerConstantPool(Op, DAG);
4756  case ISD::BlockAddress:
4757  return LowerBlockAddress(Op, DAG);
4758  case ISD::VASTART:
4759  return LowerVASTART(Op, DAG);
4760  case ISD::VACOPY:
4761  return LowerVACOPY(Op, DAG);
4762  case ISD::VAARG:
4763  return LowerVAARG(Op, DAG);
4764  case ISD::ADDC:
4765  case ISD::ADDE:
4766  case ISD::SUBC:
4767  case ISD::SUBE:
4768  return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
4769  case ISD::SADDO:
4770  case ISD::UADDO:
4771  case ISD::SSUBO:
4772  case ISD::USUBO:
4773  case ISD::SMULO:
4774  case ISD::UMULO:
4775  return LowerXALUO(Op, DAG);
4776  case ISD::FADD:
4777  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
4778  case ISD::FSUB:
4779  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
4780  case ISD::FMUL:
4781  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
4782  case ISD::FMA:
4783  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
4784  case ISD::FDIV:
4785  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
4786  case ISD::FNEG:
4787  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
4788  case ISD::FCEIL:
4789  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
4790  case ISD::FFLOOR:
4791  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
4792  case ISD::FNEARBYINT:
4793  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
4794  case ISD::FRINT:
4795  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
4796  case ISD::FROUND:
4797  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
4798  case ISD::FROUNDEVEN:
4799  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
4800  case ISD::FTRUNC:
4801  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
4802  case ISD::FSQRT:
4803  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
4804  case ISD::FABS:
4805  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
4806  case ISD::FP_ROUND:
4807  case ISD::STRICT_FP_ROUND:
4808  return LowerFP_ROUND(Op, DAG);
4809  case ISD::FP_EXTEND:
4810  return LowerFP_EXTEND(Op, DAG);
4811  case ISD::FRAMEADDR:
4812  return LowerFRAMEADDR(Op, DAG);
4813  case ISD::SPONENTRY:
4814  return LowerSPONENTRY(Op, DAG);
4815  case ISD::RETURNADDR:
4816  return LowerRETURNADDR(Op, DAG);
4817  case ISD::ADDROFRETURNADDR:
4818  return LowerADDROFRETURNADDR(Op, DAG);
4819  case ISD::CONCAT_VECTORS:
4820  return LowerCONCAT_VECTORS(Op, DAG);
4822  return LowerINSERT_VECTOR_ELT(Op, DAG);
4824  return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4825  case ISD::BUILD_VECTOR:
4826  return LowerBUILD_VECTOR(Op, DAG);
4827  case ISD::VECTOR_SHUFFLE:
4828  return LowerVECTOR_SHUFFLE(Op, DAG);
4829  case ISD::SPLAT_VECTOR:
4830  return LowerSPLAT_VECTOR(Op, DAG);
4832  return LowerEXTRACT_SUBVECTOR(Op, DAG);
4833  case ISD::INSERT_SUBVECTOR:
4834  return LowerINSERT_SUBVECTOR(Op, DAG);
4835  case ISD::SDIV:
4836  case ISD::UDIV:
4837  return LowerDIV(Op, DAG);
4838  case ISD::SMIN:
4839  return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
4840  /*OverrideNEON=*/true);
4841  case ISD::UMIN:
4842  return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
4843  /*OverrideNEON=*/true);
4844  case ISD::SMAX:
4845  return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
4846  /*OverrideNEON=*/true);
4847  case ISD::UMAX:
4848  return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
4849  /*OverrideNEON=*/true);
4850  case ISD::SRA:
4851  case ISD::SRL:
4852  case ISD::SHL:
4853  return LowerVectorSRA_SRL_SHL(Op, DAG);
4854  case ISD::SHL_PARTS:
4855  case ISD::SRL_PARTS:
4856  case ISD::SRA_PARTS:
4857  return LowerShiftParts(Op, DAG);
4858  case ISD::CTPOP:
4859  return LowerCTPOP(Op, DAG);
4860  case ISD::FCOPYSIGN:
4861  return LowerFCOPYSIGN(Op, DAG);
4862  case ISD::OR:
4863  return LowerVectorOR(Op, DAG);
4864  case ISD::XOR:
4865  return LowerXOR(Op, DAG);
4866  case ISD::PREFETCH:
4867  return LowerPREFETCH(Op, DAG);
4868  case ISD::SINT_TO_FP:
4869  case ISD::UINT_TO_FP:
4872  return LowerINT_TO_FP(Op, DAG);
4873  case ISD::FP_TO_SINT:
4874  case ISD::FP_TO_UINT:
4877  return LowerFP_TO_INT(Op, DAG);
4878  case ISD::FP_TO_SINT_SAT:
4879  case ISD::FP_TO_UINT_SAT:
4880  return LowerFP_TO_INT_SAT(Op, DAG);
4881  case ISD::FSINCOS:
4882  return LowerFSINCOS(Op, DAG);
4883  case ISD::FLT_ROUNDS_:
4884  return LowerFLT_ROUNDS_(Op, DAG);
4885  case ISD::SET_ROUNDING:
4886  return LowerSET_ROUNDING(Op, DAG);
4887  case ISD::MUL:
4888  return LowerMUL(Op, DAG);
4889  case ISD::MULHS:
4890  return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED,
4891  /*OverrideNEON=*/true);
4892  case ISD::MULHU:
4893  return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
4894  /*OverrideNEON=*/true);
4896  return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4897  case ISD::STORE:
4898  return LowerSTORE(Op, DAG);
4899  case ISD::MSTORE:
4900  return LowerFixedLengthVectorMStoreToSVE(Op, DAG);
4901  case ISD::MGATHER:
4902  return LowerMGATHER(Op, DAG);
4903  case ISD::MSCATTER:
4904  return LowerMSCATTER(Op, DAG);
4906  return LowerVECREDUCE_SEQ_FADD(Op, DAG);
4907  case ISD::VECREDUCE_ADD:
4908  case ISD::VECREDUCE_AND:
4909  case ISD::VECREDUCE_OR:
4910  case ISD::VECREDUCE_XOR:
4911  case ISD::VECREDUCE_SMAX:
4912  case ISD::VECREDUCE_SMIN:
4913  case ISD::VECREDUCE_UMAX:
4914  case ISD::VECREDUCE_UMIN:
4915  case ISD::VECREDUCE_FADD:
4916  case ISD::VECREDUCE_FMAX:
4917  case ISD::VECREDUCE_FMIN:
4918  return LowerVECREDUCE(Op, DAG);
4919  case ISD::ATOMIC_LOAD_SUB:
4920  return LowerATOMIC_LOAD_SUB(Op, DAG);
4921  case ISD::ATOMIC_LOAD_AND:
4922  return LowerATOMIC_LOAD_AND(Op, DAG);
4924  return LowerDYNAMIC_STACKALLOC(Op, DAG);
4925  case ISD::VSCALE:
4926  return LowerVSCALE(Op, DAG);
4927  case ISD::ANY_EXTEND:
4928  case ISD::SIGN_EXTEND:
4929  case ISD::ZERO_EXTEND:
4930  return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
4931  case ISD::SIGN_EXTEND_INREG: {
4932  // Only custom lower when ExtraVT has a legal byte based element type.
4933  EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4934  EVT ExtraEltVT = ExtraVT.getVectorElementType();
4935  if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
4936  (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
4937  return SDValue();
4938 
4939  return LowerToPredicatedOp(Op, DAG,
4941  }
4942  case ISD::TRUNCATE:
4943  return LowerTRUNCATE(Op, DAG);
4944  case ISD::MLOAD:
4945  return LowerMLOAD(Op, DAG);
4946  case ISD::LOAD:
4947  if (useSVEForFixedLengthVectorVT(Op.getValueType()))
4948  return LowerFixedLengthVectorLoadToSVE(Op, DAG);
4949  return LowerLOAD(Op, DAG);
4950  case ISD::ADD:
4951  return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
4952  case ISD::AND:
4953  return LowerToScalableOp(Op, DAG);
4954  case ISD::SUB:
4955  return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
4956  case ISD::FMAXIMUM:
4957  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
4958  case ISD::FMAXNUM:
4959  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
4960  case ISD::FMINIMUM:
4961  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
4962  case ISD::FMINNUM:
4963  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
4964  case ISD::VSELECT:
4965  return LowerFixedLengthVectorSelectToSVE(Op, DAG);
4966  case ISD::ABS:
4967  return LowerABS(Op, DAG);
4968  case ISD::BITREVERSE:
4969  return LowerBitreverse(Op, DAG);
4970  case ISD::BSWAP:
4971  return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
4972  case ISD::CTLZ:
4973  return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
4974  /*OverrideNEON=*/true);
4975  case ISD::CTTZ:
4976  return LowerCTTZ(Op, DAG);
4977  case ISD::VECTOR_SPLICE:
4978  return LowerVECTOR_SPLICE(Op, DAG);
4979  }
4980 }
4981 
4983  return !Subtarget->useSVEForFixedLengthVectors();
4984 }
4985 
4986 bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
4987  EVT VT, bool OverrideNEON) const {
4988  if (!Subtarget->useSVEForFixedLengthVectors())
4989  return false;
4990 
4991  if (!VT.isFixedLengthVector())
4992  return false;
4993 
4994  // Don't use SVE for vectors we cannot scalarize if required.
4995  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
4996  // Fixed length predicates should be promoted to i8.
4997  // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
4998  case MVT::i1:
4999  default:
5000  return false;
5001  case MVT::i8:
5002  case MVT::i16:
5003  case MVT::i32:
5004  case MVT::i64:
5005  case MVT::f16:
5006  case MVT::f32:
5007  case MVT::f64:
5008  break;
5009  }
5010 
5011  // All SVE implementations support NEON sized vectors.
5012  if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
5013  return true;
5014 
5015  // Ensure NEON MVTs only belong to a single register class.
5016  if (VT.getFixedSizeInBits() <= 128)
5017  return false;
5018 
5019  // Don't use SVE for types that don't fit.
5020  if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
5021  return false;
5022 
5023  // TODO: Perhaps an artificial restriction, but worth having whilst getting
5024  // the base fixed length SVE support in place.
5025  if (!VT.isPow2VectorType())
5026  return false;
5027 
5028  return true;
5029 }
5030 
5031 //===----------------------------------------------------------------------===//
5032 // Calling Convention Implementation
5033 //===----------------------------------------------------------------------===//
5034 
5035 /// Selects the correct CCAssignFn for a given CallingConvention value.
5037  bool IsVarArg) const {
5038  switch (CC) {
5039  default:
5040  report_fatal_error("Unsupported calling convention.");
5042  return CC_AArch64_WebKit_JS;
5043  case CallingConv::GHC:
5044  return CC_AArch64_GHC;
5045  case CallingConv::C:
5046  case CallingConv::Fast:
5049  case CallingConv::Swift:
5051  case CallingConv::Tail:
5052  if (Subtarget->isTargetWindows() && IsVarArg)
5053  return CC_AArch64_Win64_VarArg;
5054  if (!Subtarget->isTargetDarwin())
5055  return CC_AArch64_AAPCS;
5056  if (!IsVarArg)
5057  return CC_AArch64_DarwinPCS;
5058  return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
5060  case CallingConv::Win64:
5061  return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
5066  return CC_AArch64_AAPCS;
5067  }
5068 }
5069 
5070 CCAssignFn *
5074 }
5075 
5076 SDValue AArch64TargetLowering::LowerFormalArguments(
5077  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
5078  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5079  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5080  MachineFunction &MF = DAG.getMachineFunction();
5081  MachineFrameInfo &MFI = MF.getFrameInfo();
5082  bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5083 
5084  // Assign locations to all of the incoming arguments.
5086  DenseMap<unsigned, SDValue> CopiedRegs;
5087  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
5088  *DAG.getContext());
5089 
5090  // At this point, Ins[].VT may already be promoted to i32. To correctly
5091  // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5092  // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5093  // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
5094  // we use a special version of AnalyzeFormalArguments to pass in ValVT and
5095  // LocVT.
5096  unsigned NumArgs = Ins.size();
5098  unsigned CurArgIdx = 0;
5099  for (unsigned i = 0; i != NumArgs; ++i) {
5100  MVT ValVT = Ins[i].VT;
5101  if (Ins[i].isOrigArg()) {
5102  std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
5103  CurArgIdx = Ins[i].getOrigArgIndex();
5104 
5105  // Get type of the original argument.
5106  EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
5107  /*AllowUnknown*/ true);
5108  MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
5109  // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5110  if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5111  ValVT = MVT::i8;
5112  else if (ActualMVT == MVT::i16)
5113  ValVT = MVT::i16;
5114  }
5115  bool UseVarArgCC = false;
5116  if (IsWin64)
5117  UseVarArgCC = isVarArg;
5118  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
5119  bool Res =
5120  AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
5121  assert(!Res && "Call operand has unhandled type");
5122  (void)Res;
5123  }
5124  SmallVector<SDValue, 16> ArgValues;
5125  unsigned ExtraArgLocs = 0;
5126  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5127  CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5128 
5129  if (Ins[i].Flags.isByVal()) {
5130  // Byval is used for HFAs in the PCS, but the system should work in a
5131  // non-compliant manner for larger structs.
5132  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5133  int Size = Ins[i].Flags.getByValSize();
5134  unsigned NumRegs = (Size + 7) / 8;
5135 
5136  // FIXME: This works on big-endian for composite byvals, which are the common
5137  // case. It should also work for fundamental types too.
5138  unsigned FrameIdx =
5139  MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
5140  SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
5141  InVals.push_back(FrameIdxN);
5142 
5143  continue;
5144  }
5145 
5146  if (Ins[i].Flags.isSwiftAsync())
5148 
5149  SDValue ArgValue;
5150  if (VA.isRegLoc()) {
5151  // Arguments stored in registers.
5152  EVT RegVT = VA.getLocVT();
5153  const TargetRegisterClass *RC;
5154 
5155  if (RegVT == MVT::i32)
5156  RC = &AArch64::GPR32RegClass;
5157  else if (RegVT == MVT::i64)
5158  RC = &AArch64::GPR64RegClass;
5159  else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
5160  RC = &AArch64::FPR16RegClass;
5161  else if (RegVT == MVT::f32)
5162  RC = &AArch64::FPR32RegClass;
5163  else if (RegVT == MVT::f64 || RegVT.is64BitVector())
5164  RC = &AArch64::FPR64RegClass;
5165  else if (RegVT == MVT::f128 || RegVT.is128BitVector())
5166  RC = &AArch64::FPR128RegClass;
5167  else if (RegVT.isScalableVector() &&
5168  RegVT.getVectorElementType() == MVT::i1)
5169  RC = &AArch64::PPRRegClass;
5170  else if (RegVT.isScalableVector())
5171  RC = &AArch64::ZPRRegClass;
5172  else
5173  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
5174 
5175  // Transform the arguments in physical registers into virtual ones.
5176  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
5177  ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
5178 
5179  // If this is an 8, 16 or 32-bit value, it is really passed promoted
5180  // to 64 bits. Insert an assert[sz]ext to capture this, then
5181  // truncate to the right size.
5182  switch (VA.getLocInfo()) {
5183  default:
5184  llvm_unreachable("Unknown loc info!");
5185  case CCValAssign::Full:
5186  break;
5187  case CCValAssign::Indirect:
5188  assert(VA.getValVT().isScalableVector() &&
5189  "Only scalable vectors can be passed indirectly");
5190  break;
5191  case CCValAssign::BCvt:
5192  ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
5193  break;
5194  case CCValAssign::AExt:
5195  case CCValAssign::SExt:
5196  case CCValAssign::ZExt:
5197  break;
5199  ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
5200  DAG.getConstant(32, DL, RegVT));
5201  ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
5202  break;
5203  }
5204  } else { // VA.isRegLoc()
5205  assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
5206  unsigned ArgOffset = VA.getLocMemOffset();
5207  unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
5208  ? VA.getLocVT().getSizeInBits()
5209  : VA.getValVT().getSizeInBits()) / 8;
5210 
5211  uint32_t BEAlign = 0;
5212  if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
5213  !Ins[i].Flags.isInConsecutiveRegs())
5214  BEAlign = 8 - ArgSize;
5215 
5216  int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
5217 
5218  // Create load nodes to retrieve arguments from the stack.
5219  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5220 
5221  // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
5223  MVT MemVT = VA.getValVT();
5224 
5225  switch (VA.getLocInfo()) {
5226  default:
5227  break;
5228  case CCValAssign::Trunc:
5229  case CCValAssign::BCvt:
5230  MemVT = VA.getLocVT();
5231  break;
5232  case CCValAssign::Indirect:
5233  assert(VA.getValVT().isScalableVector() &&
5234  "Only scalable vectors can be passed indirectly");
5235  MemVT = VA.getLocVT();
5236  break;
5237  case CCValAssign::SExt:
5238  ExtType = ISD::SEXTLOAD;
5239  break;
5240  case CCValAssign::ZExt:
5241  ExtType = ISD::ZEXTLOAD;
5242  break;
5243  case CCValAssign::AExt:
5244  ExtType = ISD::EXTLOAD;
5245  break;
5246  }
5247 
5248  ArgValue = DAG.getExtLoad(
5249  ExtType, DL, VA.getLocVT(), Chain, FIN,
5251  MemVT);
5252  }
5253 
5254  if (VA.getLocInfo() == CCValAssign::Indirect) {
5255  assert(VA.getValVT().isScalableVector() &&
5256  "Only scalable vectors can be passed indirectly");
5257 
5258  uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
5259  unsigned NumParts = 1;
5260  if (Ins[i].Flags.isInConsecutiveRegs()) {
5261  assert(!Ins[i].Flags.isInConsecutiveRegsLast());
5262  while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
5263  ++NumParts;
5264  }
5265 
5266  MVT PartLoad = VA.getValVT();
5267  SDValue Ptr = ArgValue;
5268 
5269  // Ensure we generate all loads for each tuple part, whilst updating the
5270  // pointer after each load correctly using vscale.
5271  while (NumParts > 0) {
5272  ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
5273  InVals.push_back(ArgValue);
5274  NumParts--;
5275  if (NumParts > 0) {
5276  SDValue BytesIncrement = DAG.getVScale(
5277  DL, Ptr.getValueType(),
5278  APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
5279  SDNodeFlags Flags;
5280  Flags.setNoUnsignedWrap(true);
5281  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5282  BytesIncrement, Flags);
5283  ExtraArgLocs++;
5284  i++;
5285  }
5286  }
5287  } else {
5288  if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
5289  ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
5290  ArgValue, DAG.getValueType(MVT::i32));
5291  InVals.push_back(ArgValue);
5292  }
5293  }
5294  assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());
5295 
5296  // varargs
5298  if (isVarArg) {
5299  if (!Subtarget->isTargetDarwin() || IsWin64) {
5300  // The AAPCS variadic function ABI is identical to the non-variadic
5301  // one. As a result there may be more arguments in registers and we should
5302  // save them for future reference.
5303  // Win64 variadic functions also pass arguments in registers, but all float
5304  // arguments are passed in integer registers.
5305  saveVarArgRegisters(CCInfo, DAG, DL, Chain);
5306  }
5307 
5308  // This will point to the next argument passed via stack.
5309  unsigned StackOffset = CCInfo.getNextStackOffset();
5310  // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
5311  StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
5312  FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
5313 
5314  if (MFI.hasMustTailInVarArgFunc()) {
5315  SmallVector<MVT, 2> RegParmTypes;
5316  RegParmTypes.push_back(MVT::i64);
5317  RegParmTypes.push_back(MVT::f128);
5318  // Compute the set of forwarded registers. The rest are scratch.
5320  FuncInfo->getForwardedMustTailRegParms();
5321  CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
5323 
5324  // Conservatively forward X8, since it might be used for aggregate return.
5325  if (!CCInfo.isAllocated(AArch64::X8)) {
5326  unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
5327  Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
5328  }
5329  }
5330  }
5331 
5332  // On Windows, InReg pointers must be returned, so record the pointer in a
5333  // virtual register at the start of the function so it can be returned in the
5334  // epilogue.
5335  if (IsWin64) {
5336  for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
5337  if (Ins[I].Flags.isInReg()) {
5338  assert(!FuncInfo->getSRetReturnReg());
5339 
5340  MVT PtrTy = getPointerTy(DAG.getDataLayout());
5341  Register Reg =
5343  FuncInfo->setSRetReturnReg(Reg);
5344 
5345  SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
5346  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
5347  break;
5348  }
5349  }
5350  }
5351 
5352  unsigned StackArgSize = CCInfo.getNextStackOffset();
5353  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5354  if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
5355  // This is a non-standard ABI so by fiat I say we're allowed to make full
5356  // use of the stack area to be popped, which must be aligned to 16 bytes in
5357  // any case:
5358  StackArgSize = alignTo(StackArgSize, 16);
5359 
5360  // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
5361  // a multiple of 16.
5362  FuncInfo->setArgumentStackToRestore(StackArgSize);
5363 
5364  // This realignment carries over to the available bytes below. Our own
5365  // callers will guarantee the space is free by giving an aligned value to
5366  // CALLSEQ_START.
5367  }
5368  // Even if we're not expected to free up the space, it's useful to know how
5369  // much is there while considering tail calls (because we can reuse it).
5370  FuncInfo->setBytesInStackArgArea(StackArgSize);
5371 
5372  if (Subtarget->hasCustomCallingConv())
5373  Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
5374 
5375  return Chain;
5376 }
5377 
5378 void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
5379  SelectionDAG &DAG,
5380  const SDLoc &DL,
5381  SDValue &Chain) const {
5382  MachineFunction &MF = DAG.getMachineFunction();
5383  MachineFrameInfo &MFI = MF.getFrameInfo();
5385  auto PtrVT = getPointerTy(DAG.getDataLayout());
5386  bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5387 
5388  SmallVector<SDValue, 8> MemOps;
5389 
5390  static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
5391  AArch64::X3, AArch64::X4, AArch64::X5,
5392  AArch64::X6, AArch64::X7 };
5393  static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
5394  unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
5395 
5396  unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
5397  int GPRIdx = 0;
5398  if (GPRSaveSize != 0) {
5399  if (IsWin64) {
5400  GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
5401  if (GPRSaveSize & 15)
5402  // The extra size here, if triggered, will always be 8.
5403  MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
5404  } else
5405  GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
5406 
5407  SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
5408 
5409  for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
5410  unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
5411  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
5412  SDValue Store = DAG.getStore(
5413  Val.getValue(1), DL, Val, FIN,
5414  IsWin64
5416  GPRIdx,
5417  (i - FirstVariadicGPR) * 8)
5418  : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
5419  MemOps.push_back(Store);
5420  FIN =
5421  DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
5422  }
5423  }
5424  FuncInfo->setVarArgsGPRIndex(GPRIdx);
5425  FuncInfo->setVarArgsGPRSize(GPRSaveSize);
5426 
5427  if (Subtarget->hasFPARMv8() && !IsWin64) {
5428  static const MCPhysReg FPRArgRegs[] = {
5429  AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
5430  AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
5431  static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
5432  unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
5433 
5434  unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
5435  int FPRIdx = 0;
5436  if (FPRSaveSize != 0) {
5437  FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
5438 
5439  SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
5440 
5441  for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
5442  unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
5443  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
5444 
5445  SDValue Store = DAG.getStore(
5446  Val.getValue(1), DL, Val, FIN,
5448  MemOps.push_back(Store);
5449  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
5450  DAG.getConstant(16, DL, PtrVT));
5451  }
5452  }
5453  FuncInfo->setVarArgsFPRIndex(FPRIdx);
5454  FuncInfo->setVarArgsFPRSize(FPRSaveSize);
5455  }
5456 
5457  if (!MemOps.empty()) {
5458  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5459  }
5460 }
5461 
5462 /// LowerCallResult - Lower the result values of a call into the
5463 /// appropriate copies out of appropriate physical registers.
5464 SDValue AArch64TargetLowering::LowerCallResult(
5465  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5466  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5467  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
5468  SDValue ThisVal) const {
5469  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
5470  // Assign locations to each value returned by this call.
5472  DenseMap<unsigned, SDValue> CopiedRegs;
5473  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5474  *DAG.getContext());
5475  CCInfo.AnalyzeCallResult(Ins, RetCC);
5476 
5477  // Copy all of the result registers out of their specified physreg.
5478  for (unsigned i = 0; i != RVLocs.size(); ++i) {
5479  CCValAssign VA = RVLocs[i];
5480 
5481  // Pass 'this' value directly from the argument to return value, to avoid
5482  // reg unit interference
5483  if (i == 0 && isThisReturn) {
5484  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
5485  "unexpected return calling convention register assignment");
5486  InVals.push_back(ThisVal);
5487  continue;
5488  }
5489 
5490  // Avoid copying a physreg twice since RegAllocFast is incompetent and only
5491  // allows one use of a physreg per block.
5492  SDValue Val = CopiedRegs.lookup(VA.getLocReg());
5493  if (!Val) {
5494  Val =
5495  DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
5496  Chain = Val.getValue(1);
5497  InFlag = Val.getValue(2);
5498  CopiedRegs[VA.getLocReg()] = Val;
5499  }
5500 
5501  switch (VA.getLocInfo()) {
5502  default:
5503  llvm_unreachable("Unknown loc info!");
5504  case CCValAssign::Full:
5505  break;
5506  case CCValAssign::BCvt:
5507  Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5508  break;
5510  Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
5511  DAG.getConstant(32, DL, VA.getLocVT()));
5513  case CCValAssign::AExt:
5515  case CCValAssign::ZExt:
5516  Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
5517  break;
5518  }
5519 
5520  InVals.push_back(Val);
5521  }
5522 
5523  return Chain;
5524 }
5525 
5526 /// Return true if the calling convention is one that we can guarantee TCO for.
5527 static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
5528  return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
5530 }
5531 
5532 /// Return true if we might ever do TCO for calls with this calling convention.
5534  switch (CC) {
5535  case CallingConv::C:
5538  case CallingConv::Swift:
5540  case CallingConv::Tail:
5541  case CallingConv::Fast:
5542  return true;
5543  default:
5544  return false;
5545  }
5546 }
5547 
5548 bool AArch64TargetLowering::isEligibleForTailCallOptimization(
5549  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
5550  const SmallVectorImpl<ISD::OutputArg> &Outs,
5551  const SmallVectorImpl<SDValue> &OutVals,
5552  const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5553  if (!mayTailCallThisCC(CalleeCC))
5554  return false;
5555 
5556  MachineFunction &MF = DAG.getMachineFunction();
5557  const Function &CallerF = MF.getFunction();
5558  CallingConv::ID CallerCC = CallerF.getCallingConv();
5559 
5560  // Functions using the C or Fast calling convention that have an SVE signature
5561  // preserve more registers and should assume the SVE_VectorCall CC.
5562  // The check for matching callee-saved regs will determine whether it is
5563  // eligible for TCO.
5564  if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
5567 
5568  bool CCMatch = CallerCC == CalleeCC;
5569 
5570  // When using the Windows calling convention on a non-windows OS, we want
5571  // to back up and restore X18 in such functions; we can't do a tail call
5572  // from those functions.
5573  if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
5574  CalleeCC != CallingConv::Win64)
5575  return false;
5576 
5577  // Byval parameters hand the function a pointer directly into the stack area
5578  // we want to reuse during a tail call. Working around this *is* possible (see
5579  // X86) but less efficient and uglier in LowerCall.
5580  for (Function::const_arg_iterator i = CallerF.arg_begin(),
5581  e = CallerF.arg_end();
5582  i != e; ++i) {
5583  if (i->hasByValAttr())
5584  return false;
5585 
5586  // On Windows, "inreg" attributes signify non-aggregate indirect returns.
5587  // In this case, it is necessary to save/restore X0 in the callee. Tail
5588  // call opt interferes with this. So we disable tail call opt when the
5589  // caller has an argument with "inreg" attribute.
5590 
5591  // FIXME: Check whether the callee also has an "inreg" argument.
5592  if (i->hasInRegAttr())
5593  return false;
5594  }
5595 
5596  if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
5597  return CCMatch;
5598 
5599  // Externally-defined functions with weak linkage should not be
5600  // tail-called on AArch64 when the OS does not support dynamic
5601  // pre-emption of symbols, as the AAELF spec requires normal calls
5602  // to undefined weak functions to be replaced with a NOP or jump to the
5603  // next instruction. The behaviour of branch instructions in this
5604  // situation (as used for tail calls) is implementation-defined, so we
5605  // cannot rely on the linker replacing the tail call with a return.
5606  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5607  const GlobalValue *GV = G->getGlobal();
5608  const Triple &TT = getTargetMachine().getTargetTriple();
5609  if (GV->hasExternalWeakLinkage() &&
5610  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
5611  return false;
5612  }
5613 
5614  // Now we search for cases where we can use a tail call without changing the
5615  // ABI. Sibcall is used in some places (particularly gcc) to refer to this
5616  // concept.
5617 
5618  // I want anyone implementing a new calling convention to think long and hard
5619  // about this assert.
5620  assert((!isVarArg || CalleeCC == CallingConv::C) &&
5621  "Unexpected variadic calling convention");
5622 
5623  LLVMContext &C = *DAG.getContext();
5624  if (isVarArg && !Outs.empty()) {
5625  // At least two cases here: if caller is fastcc then we can't have any
5626  // memory arguments (we'd be expected to clean up the stack afterwards). If
5627  // caller is C then we could potentially use its argument area.
5628 
5629  // FIXME: for now we take the most conservative of these in both cases:
5630  // disallow all variadic memory operands.
5632  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5633 
5634  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
5635  for (const CCValAssign &ArgLoc : ArgLocs)
5636  if (!ArgLoc.isRegLoc())
5637  return false;
5638  }
5639 
5640  // Check that the call results are passed in the same way.
5641  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5642  CCAssignFnForCall(CalleeCC, isVarArg),
5643  CCAssignFnForCall(CallerCC, isVarArg)))
5644  return false;
5645  // The callee has to preserve all registers the caller needs to preserve.
5646  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5647  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5648  if (!CCMatch) {
5649  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5650  if (Subtarget->hasCustomCallingConv()) {
5651  TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
5652  TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
5653  }
5654  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5655  return false;
5656  }
5657 
5658  // Nothing more to check if the callee is taking no arguments
5659  if (Outs.empty())
5660  return true;
5661 
5663  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5664 
5665  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
5666 
5667  const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5668 
5669  // If any of the arguments is passed indirectly, it must be SVE, so the
5670  // 'getBytesInStackArgArea' is not sufficient to determine whether we need to
5671  // allocate space on the stack. That is why we determine this explicitly here
5672  // the call cannot be a tailcall.
5673  if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
5674  assert((A.getLocInfo() != CCValAssign::Indirect ||
5675  A.getValVT().isScalableVector()) &&
5676  "Expected value to be scalable");
5677  return A.getLocInfo() == CCValAssign::Indirect;
5678  }))
5679  return false;
5680 
5681  // If the stack arguments for this call do not fit into our own save area then
5682  // the call cannot be made tail.
5683  if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
5684  return false;
5685 
5686  const MachineRegisterInfo &MRI = MF.getRegInfo();
5687  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5688  return false;
5689 
5690  return true;
5691 }
5692 
5693 SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
5694  SelectionDAG &DAG,
5695  MachineFrameInfo &MFI,
5696  int ClobberedFI) const {
5697  SmallVector<SDValue, 8> ArgChains;
5698  int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
5699  int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
5700 
5701  // Include the original chain at the beginning of the list. When this is
5702  // used by target LowerCall hooks, this helps legalize find the
5703  // CALLSEQ_BEGIN node.
5704  ArgChains.push_back(Chain);
5705 
5706  // Add a chain value for each stack argument corresponding
5707  for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
5708  UE = DAG.getEntryNode().getNode()->use_end();
5709  U != UE; ++U)
5710  if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
5711  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
5712  if (FI->getIndex() < 0) {
5713  int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
5714  int64_t InLastByte = InFirstByte;
5715  InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
5716 
5717  if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
5718  (FirstByte <= InFirstByte && InFirstByte <= LastByte))
5719  ArgChains.push_back(SDValue(L, 1));
5720  }
5721 
5722  // Build a tokenfactor for all the chains.
5723  return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
5724 }
5725 
5726 bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
5727  bool TailCallOpt) const {
5728  return (CallCC == CallingConv::Fast && TailCallOpt) ||
5729  CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
5730 }
5731 
5732 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
5733 /// and add input and output parameter nodes.
5734 SDValue
5735 AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
5736  SmallVectorImpl<SDValue> &InVals) const {
5737  SelectionDAG &DAG = CLI.DAG;
5738  SDLoc &DL = CLI.DL;
5739  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
5740  SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
5742  SDValue Chain = CLI.Chain;
5743  SDValue Callee = CLI.Callee;
5744  bool &IsTailCall = CLI.IsTailCall;
5745  CallingConv::ID CallConv = CLI.CallConv;
5746  bool IsVarArg = CLI.IsVarArg;
5747 
5748  MachineFunction &MF = DAG.getMachineFunction();
5750  bool IsThisReturn = false;
5751 
5753  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5754  bool IsSibCall = false;
5755  bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv);
5756 
5757  // Check callee args/returns for SVE registers and set calling convention
5758  // accordingly.
5759  if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
5760  bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
5761  return Out.VT.isScalableVector();
5762  });
5763  bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
5764  return In.VT.isScalableVector();
5765  });
5766 
5767  if (CalleeInSVE || CalleeOutSVE)
5769  }
5770 
5771  if (IsTailCall) {
5772  // Check if it's really possible to do a tail call.
5773  IsTailCall = isEligibleForTailCallOptimization(
5774  Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
5775 
5776  // A sibling call is one where we're under the usual C ABI and not planning
5777  // to change that but can still do a tail call:
5778  if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail &&
5779  CallConv != CallingConv::SwiftTail)
5780  IsSibCall = true;
5781 
5782  if (IsTailCall)
5783  ++NumTailCalls;
5784  }
5785 
5786  if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
5787  report_fatal_error("failed to perform tail call elimination on a call "
5788  "site marked musttail");
5789 
5790  // Analyze operands of the call, assigning locations to each operand.
5792  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
5793  *DAG.getContext());
5794 
5795  if (IsVarArg) {
5796  // Handle fixed and variable vector arguments differently.
5797  // Variable vector arguments always go into memory.
5798  unsigned NumArgs = Outs.size();
5799 
5800  for (unsigned i = 0; i != NumArgs; ++i) {
5801  MVT ArgVT = Outs[i].VT;
5802  if (!Outs[i].IsFixed && ArgVT.isScalableVector())
5803  report_fatal_error("Passing SVE types to variadic functions is "
5804  "currently not supported");
5805 
5806  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5807  bool UseVarArgCC = !Outs[i].IsFixed;
5808  // On Windows, the fixed arguments in a vararg call are passed in GPRs
5809  // too, so use the vararg CC to force them to integer registers.
5810  if (IsCalleeWin64)
5811  UseVarArgCC = true;
5812  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
5813  bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
5814  assert(!Res && "Call operand has unhandled type");
5815  (void)Res;
5816  }
5817  } else {
5818  // At this point, Outs[].VT may already be promoted to i32. To correctly
5819  // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5820  // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5821  // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
5822  // we use a special version of AnalyzeCallOperands to pass in ValVT and
5823  // LocVT.
5824  unsigned NumArgs = Outs.size();
5825  for (unsigned i = 0; i != NumArgs; ++i) {
5826  MVT ValVT = Outs[i].VT;
5827  // Get type of the original argument.
5828  EVT ActualVT = getValueType(DAG.getDataLayout(),
5829  CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
5830  /*AllowUnknown*/ true);
5831  MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
5832  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5833  // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5834  if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5835  ValVT = MVT::i8;
5836  else if (ActualMVT == MVT::i16)
5837  ValVT = MVT::i16;
5838 
5839  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
5840  bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
5841  assert(!Res && "Call operand has unhandled type");
5842  (void)Res;
5843  }
5844  }
5845 
5846  // Get a count of how many bytes are to be pushed on the stack.
5847  unsigned NumBytes = CCInfo.getNextStackOffset();
5848 
5849  if (IsSibCall) {
5850  // Since we're not changing the ABI to make this a tail call, the memory
5851  // operands are already available in the caller's incoming argument space.
5852  NumBytes = 0;
5853  }
5854 
5855  // FPDiff is the byte offset of the call's argument area from the callee's.
5856  // Stores to callee stack arguments will be placed in FixedStackSlots offset
5857  // by this amount for a tail call. In a sibling call it must be 0 because the
5858  // caller will deallocate the entire stack and the callee still expects its
5859  // arguments to begin at SP+0. Completely unused for non-tail calls.
5860  int FPDiff = 0;
5861 
5862  if (IsTailCall && !IsSibCall) {
5863  unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
5864 
5865  // Since callee will pop argument stack as a tail call, we must keep the
5866  // popped size 16-byte aligned.
5867  NumBytes = alignTo(NumBytes, 16);
5868 
5869  // FPDiff will be negative if this tail call requires more space than we
5870  // would automatically have in our incoming argument space. Positive if we
5871  // can actually shrink the stack.
5872  FPDiff = NumReusableBytes - NumBytes;
5873 
5874  // Update the required reserved area if this is the tail call requiring the
5875  // most argument stack space.
5876  if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
5877  FuncInfo->setTailCallReservedStack(-FPDiff);
5878 
5879  // The stack pointer must be 16-byte aligned at all times it's used for a
5880  // memory operation, which in practice means at *all* times and in
5881  // particular across call boundaries. Therefore our own arguments started at
5882  // a 16-byte aligned SP and the delta applied for the tail call should
5883  // satisfy the same constraint.
5884  assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
5885  }
5886 
5887  // Adjust the stack pointer for the new arguments...
5888  // These operations are automatically eliminated by the prolog/epilog pass
5889  if (!IsSibCall)
5890  Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL);
5891 
5892  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
5893  getPointerTy(DAG.getDataLayout()));
5894 
5896  SmallSet<unsigned, 8> RegsUsed;
5897  SmallVector<SDValue, 8> MemOpChains;
5898  auto PtrVT = getPointerTy(DAG.getDataLayout());
5899 
5900  if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
5901  const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
5902  for (const auto &F : Forwards) {
5903  SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
5904  RegsToPass.emplace_back(F.PReg, Val);
5905  }
5906  }
5907 
5908  // Walk the register/memloc assignments, inserting copies/loads.
5909  unsigned ExtraArgLocs = 0;
5910  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5911  CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5912  SDValue Arg = OutVals[i];
5913  ISD::ArgFlagsTy Flags = Outs[i].Flags;
5914 
5915  // Promote the value if needed.
5916  switch (VA.getLocInfo()) {
5917  default:
5918  llvm_unreachable("Unknown loc info!");
5919  case CCValAssign::Full:
5920  break;
5921  case CCValAssign::SExt:
5922  Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
5923  break;
5924  case CCValAssign::ZExt:
5925  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
5926  break;
5927  case CCValAssign::AExt:
5928  if (Outs[i].ArgVT == MVT::i1) {
5929  // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
5930  Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
5932  }
5933  Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5934  break;
5936  assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
5937  Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5938  Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
5939  DAG.getConstant(32, DL, VA.getLocVT()));
5940  break;
5941  case CCValAssign::BCvt:
5942  Arg = DAG.getBitcast(VA.getLocVT(), Arg);
5943  break;
5944  case CCValAssign::Trunc:
5945  Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
5946  break;
5947  case CCValAssign::FPExt:
5948  Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
5949  break;
5950  case CCValAssign::Indirect:
5951  assert(VA.getValVT().isScalableVector() &&
5952  "Only scalable vectors can be passed indirectly");
5953 
5954  uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
5955  uint64_t PartSize = StoreSize;
5956  unsigned NumParts = 1;
5957  if (Outs[i].Flags.isInConsecutiveRegs()) {
5958  assert(!Outs[i].Flags.isInConsecutiveRegsLast());
5959  while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
5960  ++NumParts;
5961  StoreSize *= NumParts;
5962  }
5963 
5965  Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
5966  Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
5967  int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
5969 
5970  MachinePointerInfo MPI =
5972  SDValue Ptr = DAG.getFrameIndex(
5974  SDValue SpillSlot = Ptr;
5975 
5976  // Ensure we generate all stores for each tuple part, whilst updating the
5977  // pointer after each store correctly using vscale.
5978  while (NumParts) {
5979  Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
5980  NumParts--;
5981  if (NumParts > 0) {
5982  SDValue BytesIncrement = DAG.getVScale(
5983  DL, Ptr.getValueType(),
5984  APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
5985  SDNodeFlags Flags;
5986  Flags.setNoUnsignedWrap(true);
5987 
5988  MPI = MachinePointerInfo(MPI.getAddrSpace());
5989  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5990  BytesIncrement, Flags);
5991  ExtraArgLocs++;
5992  i++;
5993  }
5994  }
5995 
5996  Arg = SpillSlot;
5997  break;
5998  }
5999 
6000  if (VA.isRegLoc()) {
6001  if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
6002  Outs[0].VT == MVT::i64) {
6003  assert(VA.getLocVT() == MVT::i64 &&
6004  "unexpected calling convention register assignment");
6005  assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
6006  "unexpected use of 'returned'");
6007  IsThisReturn = true;
6008  }
6009  if (RegsUsed.count(VA.getLocReg())) {
6010  // If this register has already been used then we're trying to pack
6011  // parts of an [N x i32] into an X-register. The extension type will
6012  // take care of putting the two halves in the right place but we have to
6013  // combine them.
6014  SDValue &Bits =
6015  llvm::find_if(RegsToPass,
6016  [=](const std::pair<unsigned, SDValue> &Elt) {
6017  return Elt.first == VA.getLocReg();
6018  })
6019  ->second;
6020  Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
6021  // Call site info is used for function's parameter entry value
6022  // tracking. For now we track only simple cases when parameter
6023  // is transferred through whole register.
6024  llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
6025  return ArgReg.Reg == VA.getLocReg();
6026  });
6027  } else {
6028  RegsToPass.emplace_back(VA.getLocReg(), Arg);
6029  RegsUsed.insert(VA.getLocReg());
6030  const TargetOptions &Options = DAG.getTarget().Options;
6031  if (Options.EmitCallSiteInfo)
6032  CSInfo.emplace_back(VA.getLocReg(), i);
6033  }
6034  } else {
6035  assert(VA.isMemLoc());
6036 
6037  SDValue DstAddr;
6038  MachinePointerInfo DstInfo;
6039 
6040  // FIXME: This works on big-endian for composite byvals, which are the
6041  // common case. It should also work for fundamental types too.
6042  uint32_t BEAlign = 0;
6043  unsigned OpSize;
6044  if (VA.getLocInfo() == CCValAssign::Indirect ||
6046  OpSize = VA.getLocVT().getFixedSizeInBits();
6047  else
6048  OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
6049  : VA.getValVT().getSizeInBits();
6050  OpSize = (OpSize + 7) / 8;
6051  if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
6052  !Flags.isInConsecutiveRegs()) {
6053  if (OpSize < 8)
6054  BEAlign = 8 - OpSize;
6055  }
6056  unsigned LocMemOffset = VA.getLocMemOffset();
6057  int32_t Offset = LocMemOffset + BEAlign;
6058  SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
6059  PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
6060 
6061  if (IsTailCall) {
6062  Offset = Offset + FPDiff;
6063  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
6064 
6065  DstAddr = DAG.getFrameIndex(FI, PtrVT);
6066  DstInfo =
6068 
6069  // Make sure any stack arguments overlapping with where we're storing
6070  // are loaded before this eventual operation. Otherwise they'll be
6071  // clobbered.
6072  Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
6073  } else {
6074  SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
6075 
6076  DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
6078  LocMemOffset);
6079  }
6080 
6081  if (Outs[i].Flags.isByVal()) {
6082  SDValue SizeNode =
6083  DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
6084  SDValue Cpy = DAG.getMemcpy(
6085  Chain, DL, DstAddr, Arg, SizeNode,
6086  Outs[i].Flags.getNonZeroByValAlign(),
6087  /*isVol = */ false, /*AlwaysInline = */ false,
6088  /*isTailCall = */ false, DstInfo, MachinePointerInfo());
6089 
6090  MemOpChains.push_back(Cpy);
6091  } else {
6092  // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
6093  // promoted to a legal register type i32, we should truncate Arg back to
6094  // i1/i8/i16.
6095  if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
6096  VA.getValVT() == MVT::i16)
6097  Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
6098 
6099  SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
6100  MemOpChains.push_back(Store);
6101  }
6102  }
6103  }
6104 
6105  if (!MemOpChains.empty())
6106  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
6107 
6108  // Build a sequence of copy-to-reg nodes chained together with token chain
6109  // and flag operands which copy the outgoing args into the appropriate regs.
6110  SDValue InFlag;
6111  for (auto &RegToPass : RegsToPass) {
6112  Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
6113  RegToPass.second, InFlag);
6114  InFlag = Chain.getValue(1);
6115  }
6116 
6117  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
6118  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
6119  // node so that legalize doesn't hack it.
6120  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
6121  auto GV = G->getGlobal();
6122  unsigned OpFlags =
6124  if (OpFlags & AArch64II::MO_GOT) {
6125  Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
6126  Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
6127  } else {
6128  const GlobalValue *GV = G->getGlobal();
6129  Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
6130  }
6131  } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
6133  Subtarget->isTargetMachO()) {
6134  const char *Sym = S->getSymbol();
6136  Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
6137  } else {
6138  const char *Sym = S->getSymbol();
6139  Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
6140  }
6141  }
6142 
6143  // We don't usually want to end the call-sequence here because we would tidy
6144  // the frame up *after* the call, however in the ABI-changing tail-call case
6145  // we've carefully laid out the parameters so that when sp is reset they'll be
6146  // in the correct location.
6147  if (IsTailCall && !IsSibCall) {
6148  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
6149  DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
6150  InFlag = Chain.getValue(1);
6151  }
6152 
6153  std::vector<SDValue> Ops;
6154  Ops.push_back(Chain);
6155  Ops.push_back(Callee);
6156 
6157  if (IsTailCall) {
6158  // Each tail call may have to adjust the stack by a different amount, so
6159  // this information must travel along with the operation for eventual
6160  // consumption by emitEpilogue.
6161  Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
6162  }
6163 
6164  // Add argument registers to the end of the list so that they are known live
6165  // into the call.
6166  for (auto &RegToPass : RegsToPass)
6167  Ops.push_back(DAG.getRegister(RegToPass.first,
6168  RegToPass.second.getValueType()));
6169 
6170  // Add a register mask operand representing the call-preserved registers.
6171  const uint32_t *Mask;
6172  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
6173  if (IsThisReturn) {
6174  // For 'this' returns, use the X0-preserving mask if applicable
6175  Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
6176  if (!Mask) {
6177  IsThisReturn = false;
6178  Mask = TRI->getCallPreservedMask(MF, CallConv);
6179  }
6180  } else
6181  Mask = TRI->getCallPreservedMask(MF, CallConv);
6182 
6183  if (Subtarget->hasCustomCallingConv())
6184  TRI->UpdateCustomCallPreservedMask(MF, &Mask);
6185 
6186  if (TRI->isAnyArgRegReserved(MF))
6187  TRI->emitReservedArgRegCallError(MF);
6188 
6189  assert(Mask && "Missing call preserved mask for calling convention");
6190  Ops.push_back(DAG.getRegisterMask(Mask));
6191 
6192  if (InFlag.getNode())
6193  Ops.push_back(InFlag);
6194 
6195  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6196 
6197  // If we're doing a tall call, use a TC_RETURN here rather than an
6198  // actual call instruction.
6199  if (IsTailCall) {
6201  SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
6202  DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
6203  return Ret;
6204  }
6205 
6206  unsigned CallOpc = AArch64ISD::CALL;
6207  // Calls with operand bundle "clang.arc.attachedcall" are special. They should
6208  // be expanded to the call, directly followed by a special marker sequence.
6209  // Use the CALL_RVMARKER to do that.
6210  if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
6211  assert(!IsTailCall &&
6212  "tail calls cannot be marked with clang.arc.attachedcall");
6213  CallOpc = AArch64ISD::CALL_RVMARKER;
6214  }
6215 
6216  // Returns a chain and a flag for retval copy to use.
6217  Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
6218  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
6219  InFlag = Chain.getValue(1);
6220  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
6221 
6222  uint64_t CalleePopBytes =
6223  DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
6224 
6225  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
6226  DAG.getIntPtrConstant(CalleePopBytes, DL, true),
6227  InFlag, DL);
6228  if (!Ins.empty())
6229  InFlag = Chain.getValue(1);
6230 
6231  // Handle result values, copying them out of physregs into vregs that we
6232  // return.
6233  return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
6234  InVals, IsThisReturn,
6235  IsThisReturn ? OutVals[0] : SDValue());
6236 }
6237 
6238 bool AArch64TargetLowering::CanLowerReturn(
6239  CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
6240  const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
6241  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
6243  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6244  return CCInfo.CheckReturn(Outs, RetCC);
6245 }
6246 
6247 SDValue
6248 AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6249  bool isVarArg,
6250  const SmallVectorImpl<ISD::OutputArg> &Outs,
6251  const SmallVectorImpl<SDValue> &OutVals,
6252  const SDLoc &DL, SelectionDAG &DAG) const {
6253  auto &MF = DAG.getMachineFunction();
6254  auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
6255 
6256  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
6258  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6259  *DAG.getContext());
6260  CCInfo.AnalyzeReturn(Outs, RetCC);
6261 
6262  // Copy the result values into the output registers.
6263  SDValue Flag;
6265  SmallSet<unsigned, 4> RegsUsed;
6266  for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
6267  ++i, ++realRVLocIdx) {
6268  CCValAssign &VA = RVLocs[i];
6269  assert(VA.isRegLoc() && "Can only return in registers!");
6270  SDValue Arg = OutVals[realRVLocIdx];
6271 
6272  switch (VA.getLocInfo()) {
6273  default:
6274  llvm_unreachable("Unknown loc info!");
6275  case CCValAssign::Full:
6276  if (Outs[i].ArgVT == MVT::i1) {
6277  // AAPCS requires i1 to be zero-extended to i8 by the producer of the
6278  // value. This is strictly redundant on Darwin (which uses "zeroext
6279  // i1"), but will be optimised out before ISel.
6280  Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
6281  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
6282  }
6283  break;
6284  case CCValAssign::BCvt:
6285  Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
6286  break;
6287  case CCValAssign::AExt:
6288  case CCValAssign::ZExt:
6289  Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
6290  break;
6292  assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
6293  Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
6294  Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
6295  DAG.getConstant(32, DL, VA.getLocVT()));
6296  break;
6297  }
6298 
6299  if (RegsUsed.count(VA.getLocReg())) {
6300  SDValue &Bits =
6301  llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
6302  return Elt.first == VA.getLocReg();
6303  })->second;
6304  Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
6305  } else {
6306  RetVals.emplace_back(VA.getLocReg(), Arg);
6307  RegsUsed.insert(VA.getLocReg());
6308  }
6309  }
6310 
6311  SmallVector<SDValue, 4> RetOps(1, Chain);
6312  for (auto &RetVal : RetVals) {
6313  Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
6314  Flag = Chain.getValue(1);
6315  RetOps.push_back(
6316  DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
6317  }
6318 
6319  // Windows AArch64 ABIs require that for returning structs by value we copy
6320  // the sret argument into X0 for the return.
6321  // We saved the argument into a virtual register in the entry block,
6322  // so now we copy the value out and into X0.
6323  if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
6324  SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
6325  getPointerTy(MF.getDataLayout()));
6326 
6327  unsigned RetValReg = AArch64::X0;
6328  Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
6329  Flag = Chain.getValue(1);
6330 
6331  RetOps.push_back(
6332  DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
6333  }
6334 
6335  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
6336  const MCPhysReg *I =
6337  TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6338  if (I) {
6339  for (; *I; ++I) {
6340  if (AArch64::GPR64RegClass.contains(*I))
6341  RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6342  else if (AArch64::FPR64RegClass.contains(*I))
6343  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6344  else
6345  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6346  }
6347  }
6348 
6349  RetOps[0] = Chain; // Update chain.
6350 
6351  // Add the flag if we have it.
6352  if (Flag.getNode())
6353  RetOps.push_back(Flag);
6354 
6355  return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
6356 }
6357 
6358 //===----------------------------------------------------------------------===//
6359 // Other Lowering Code
6360 //===----------------------------------------------------------------------===//
6361 
6362 SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
6363  SelectionDAG &DAG,
6364  unsigned Flag) const {
6365  return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
6366  N->getOffset(), Flag);
6367 }
6368 
6369 SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
6370  SelectionDAG &DAG,
6371  unsigned Flag) const {
6372  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
6373 }
6374 
6375 SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
6376  SelectionDAG &DAG,
6377  unsigned Flag) const {
6378  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
6379  N->getOffset(), Flag);
6380 }
6381 
6382 SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
6383  SelectionDAG &DAG,
6384  unsigned Flag) const {
6385  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
6386 }
6387 
6388 // (loadGOT sym)
6389 template <class NodeTy>
6390 SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
6391  unsigned Flags) const {
6392  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
6393  SDLoc DL(N);
6394  EVT Ty = getPointerTy(DAG.getDataLayout());
6395  SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
6396  // FIXME: Once remat is capable of dealing with instructions with register
6397  // operands, expand this into two nodes instead of using a wrapper node.
6398  return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
6399 }
6400 
6401 // (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
6402 template <class NodeTy>
6403 SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
6404  unsigned Flags) const {
6405  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
6406  SDLoc DL(N);
6407  EVT Ty = getPointerTy(DAG.getDataLayout());
6408  const unsigned char MO_NC = AArch64II::MO_NC;
6409  return DAG.getNode(
6411  getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
6412  getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
6413  getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
6414  getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
6415 }
6416 
6417 // (addlow (adrp %hi(sym)) %lo(sym))
6418 template <class NodeTy>
6419 SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
6420  unsigned Flags) const {
6421  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
6422  SDLoc DL(N);
6423  EVT Ty = getPointerTy(DAG.getDataLayout());
6424  SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
6425  SDValue Lo = getTargetNode(N, Ty, DAG,
6427  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
6428  return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
6429 }
6430 
6431 // (adr sym)
6432 template <class NodeTy>
6433 SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
6434  unsigned Flags) const {
6435  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
6436  SDLoc DL(N);
6437  EVT Ty = getPointerTy(DAG.getDataLayout());
6438  SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
6439  return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
6440 }
6441 
6442 SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
6443  SelectionDAG &DAG) const {
6444  GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
6445  const GlobalValue *GV = GN->getGlobal();
6446  unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
6447 
6448  if (OpFlags != AArch64II::MO_NO_FLAG)
6449  assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
6450  "unexpected offset in global node");
6451 
6452  // This also catches the large code model case for Darwin, and tiny code
6453  // model with got relocations.
6454  if ((OpFlags & AArch64II::MO_GOT) != 0) {
6455  return getGOT(GN, DAG, OpFlags);
6456  }
6457 
6458  SDValue Result;
6460  Result = getAddrLarge(GN, DAG, OpFlags);
6461  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
6462  Result = getAddrTiny(GN, DAG, OpFlags);
6463  } else {
6464  Result = getAddr(GN, DAG, OpFlags);
6465  }
6466  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6467  SDLoc DL(GN);
6469  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
6471  return Result;
6472 }
6473 
6474 /// Convert a TLS address reference into the correct sequence of loads
6475 /// and calls to compute the variable's address (for Darwin, currently) and
6476 /// return an SDValue containing the final node.
6477 
6478 /// Darwin only has one TLS scheme which must be capable of dealing with the
6479 /// fully general situation, in the worst case. This means:
6480 /// + "extern __thread" declaration.
6481 /// + Defined in a possibly unknown dynamic library.
6482 ///
6483 /// The general system is that each __thread variable has a [3 x i64] descriptor
6484 /// which contains information used by the runtime to calculate the address. The
6485 /// only part of this the compiler needs to know about is the first xword, which
6486 /// contains a function pointer that must be called with the address of the
6487 /// entire descriptor in "x0".
6488 ///
6489 /// Since this descriptor may be in a different unit, in general even the
6490 /// descriptor must be accessed via an indirect load. The "ideal" code sequence
6491 /// is:
6492 /// adrp x0, _var@TLVPPAGE
6493 /// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
6494 /// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
6495 /// ; the function pointer
6496 /// blr x1 ; Uses descriptor address in x0
6497 /// ; Address of _var is now in x0.
6498 ///
6499 /// If the address of _var's descriptor *is* known to the linker, then it can
6500 /// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
6501 /// a slight efficiency gain.
6502 SDValue
6503 AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
6504  SelectionDAG &DAG) const {
6505  assert(Subtarget->isTargetDarwin() &&
6506  "This function expects a Darwin target");
6507 
6508  SDLoc DL(Op);
6509  MVT PtrVT = getPointerTy(DAG.getDataLayout());
6510  MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
6511  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
6512 
6513  SDValue TLVPAddr =
6514  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
6515  SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
6516 
6517  // The first entry in the descriptor is a function pointer that we must call
6518  // to obtain the address of the variable.
6519  SDValue Chain = DAG.getEntryNode();
6520  SDValue FuncTLVGet = DAG.getLoad(
6521  PtrMemVT, DL, Chain, DescAddr,
6523  Align(PtrMemVT.getSizeInBits() / 8),
6525  Chain = FuncTLVGet.getValue(1);
6526 
6527  // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
6528  FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
6529 
6531  MFI.setAdjustsStack(true);
6532 
6533  // TLS calls preserve all registers except those that absolutely must be
6534  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
6535  // silly).
6536  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
6537  const uint32_t *Mask = TRI->getTLSCallPreservedMask();
6538  if (Subtarget->hasCustomCallingConv())
6539  TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
6540 
6541  // Finally, we can make the call. This is just a degenerate version of a
6542  // normal AArch64 call node: x0 takes the address of the descriptor, and
6543  // returns the address of the variable in this thread.
6544  Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
6545  Chain =
6547  Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
6548  DAG.getRegisterMask(Mask), Chain.getValue(1));
6549  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
6550 }
6551 
6552 /// Convert a thread-local variable reference into a sequence of instructions to
6553 /// compute the variable's address for the local exec TLS model of ELF targets.
6554 /// The sequence depends on the maximum TLS area size.
6555 SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
6556  SDValue ThreadBase,
6557  const SDLoc &DL,
6558  SelectionDAG &DAG) const {
6559  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6560  SDValue TPOff, Addr;
6561 
6562  switch (DAG.getTarget().Options.TLSSize) {
6563  default:
6564  llvm_unreachable("Unexpected TLS size");
6565 
6566  case 12: {
6567  // mrs x0, TPIDR_EL0
6568  // add x0, x0, :tprel_lo12:a
6569  SDValue Var = DAG.getTargetGlobalAddress(
6570  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
6571  return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
6572  Var,
6573  DAG.getTargetConstant(0, DL, MVT::i32)),
6574  0);
6575  }
6576 
6577  case 24: {
6578  // mrs x0, TPIDR_EL0
6579  // add x0, x0, :tprel_hi12:a
6580  // add x0, x0, :tprel_lo12_nc:a
6581  SDValue HiVar = DAG.getTargetGlobalAddress(
6582  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
6583  SDValue LoVar = DAG.getTargetGlobalAddress(
6584  GV, DL, PtrVT, 0,
6586  Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
6587  HiVar,
6588  DAG.getTargetConstant(0, DL, MVT::i32)),
6589  0);
6590  return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
6591  LoVar,
6592  DAG.getTargetConstant(0, DL, MVT::i32)),
6593  0);
6594  }
6595 
6596  case 32: {
6597  // mrs x1, TPIDR_EL0
6598  // movz x0, #:tprel_g1:a
6599  // movk x0, #:tprel_g0_nc:a
6600  // add x0, x1, x0
6601  SDValue HiVar = DAG.getTargetGlobalAddress(
6602  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
6603  SDValue LoVar = DAG.getTargetGlobalAddress(
6604  GV, DL, PtrVT, 0,
6606  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
6607  DAG.getTargetConstant(16, DL, MVT::i32)),
6608  0);
6609  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
6610  DAG.getTargetConstant(0, DL, MVT::i32)),
6611  0);
6612  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6613  }
6614 
6615  case 48: {
6616  // mrs x1, TPIDR_EL0
6617  // movz x0, #:tprel_g2:a
6618  // movk x0, #:tprel_g1_nc:a
6619  // movk x0, #:tprel_g0_nc:a
6620  // add x0, x1, x0
6621  SDValue HiVar = DAG.getTargetGlobalAddress(
6622  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2);
6623  SDValue MiVar = DAG.getTargetGlobalAddress(
6624  GV, DL, PtrVT, 0,
6626  SDValue LoVar = DAG.getTargetGlobalAddress(
6627  GV, DL, PtrVT, 0,
6629  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
6630  DAG.getTargetConstant(32, DL, MVT::i32)),
6631  0);
6632  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
6633  DAG.getTargetConstant(16, DL, MVT::i32)),
6634  0);
6635  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
6636  DAG.getTargetConstant(0, DL, MVT::i32)),
6637  0);
6638  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6639  }
6640  }
6641 }
6642 
6643 /// When accessing thread-local variables under either the general-dynamic or
6644 /// local-dynamic system, we make a "TLS-descriptor" call. The variable will
6645 /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
6646 /// is a function pointer to carry out the resolution.
6647 ///
6648 /// The sequence is:
6649 /// adrp x0, :tlsdesc:var
6650 /// ldr x1, [x0, #:tlsdesc_lo12:var]
6651 /// add x0, x0, #:tlsdesc_lo12:var
6652 /// .tlsdesccall var
6653 /// blr x1
6654 /// (TPIDR_EL0 offset now in x0)
6655 ///
6656 /// The above sequence must be produced unscheduled, to enable the linker to
6657 /// optimize/relax this sequence.
6658 /// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
6659 /// above sequence, and expanded really late in the compilation flow, to ensure
6660 /// the sequence is produced as per above.
6661 SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
6662  const SDLoc &DL,
6663  SelectionDAG &DAG) const {
6664  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6665 
6666  SDValue Chain = DAG.getEntryNode();
6667  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6668 
6669  Chain =
6670  DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
6671  SDValue Glue = Chain.getValue(1);
6672 
6673  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
6674 }
6675 
6676 SDValue
6677 AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
6678  SelectionDAG &DAG) const {
6679  assert(Subtarget->isTargetELF() && "This function expects an ELF target");
6680 
6681  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6682 
6684 
6688  }
6689 
6692  report_fatal_error("ELF TLS only supported in small memory model or "
6693  "in local exec TLS model");
6694  // Different choices can be made for the maximum size of the TLS area for a
6695  // module. For the small address model, the default TLS size is 16MiB and the
6696  // maximum TLS size is 4GiB.
6697  // FIXME: add tiny and large code model support for TLS access models other
6698  // than local exec. We currently generate the same code as small for tiny,
6699  // which may be larger than needed.
6700 
6701  SDValue TPOff;
6702  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6703  SDLoc DL(Op);
6704  const GlobalValue *GV = GA->getGlobal();
6705 
6706  SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
6707 
6708  if (Model == TLSModel::LocalExec) {
6709  return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
6710  } else if (Model == TLSModel::InitialExec) {
6711  TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
6712  TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
6713  } else if (Model == TLSModel::LocalDynamic) {
6714  // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
6715  // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
6716  // the beginning of the module's TLS region, followed by a DTPREL offset
6717  // calculation.
6718 
6719  // These accesses will need deduplicating if there's more than one.
6720  AArch64FunctionInfo *MFI =
6723 
6724  // The call needs a relocation too for linker relaxation. It doesn't make
6725  // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
6726  // the address.
6727  SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
6729 
6730  // Now we can calculate the offset from TPIDR_EL0 to this module's
6731  // thread-local area.
6732  TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
6733 
6734  // Now use :dtprel_whatever: operations to calculate this variable's offset
6735  // in its thread-storage area.
6736  SDValue HiVar = DAG.getTargetGlobalAddress(
6738  SDValue LoVar = DAG.getTargetGlobalAddress(
6739  GV, DL, MVT::i64, 0,
6741 
6742  TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
6743  DAG.getTargetConstant(0, DL, MVT::i32)),
6744  0);
6745  TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
6746  DAG.getTargetConstant(0, DL, MVT::i32)),
6747  0);
6748  } else if (Model == TLSModel::GeneralDynamic) {
6749  // The call needs a relocation too for linker relaxation. It doesn't make
6750  // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
6751  // the address.
6752  SDValue SymAddr =
6753  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
6754 
6755  // Finally we can make a call to calculate the offset from tpidr_el0.
6756  TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
6757  } else
6758  llvm_unreachable("Unsupported ELF TLS access model");
6759 
6760  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6761 }
6762 
6763 SDValue
6764 AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
6765  SelectionDAG &DAG) const {
6766  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
6767 
6768  SDValue Chain = DAG.getEntryNode();
6769  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6770  SDLoc DL(Op);
6771 
6772  SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
6773 
6774  // Load the ThreadLocalStoragePointer from the TEB
6775  // A pointer to the TLS array is located at offset 0x58 from the TEB.
6776  SDValue TLSArray =
6777  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
6778  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
6779  Chain = TLSArray.getValue(1);
6780 
6781  // Load the TLS index from the C runtime;
6782  // This does the same as getAddr(), but without having a GlobalAddressSDNode.
6783  // This also does the same as LOADgot, but using a generic i32 load,
6784  // while LOADgot only loads i64.
6785  SDValue TLSIndexHi =
6786  DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
6787  SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
6788  "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
6789  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
6790  SDValue TLSIndex =
6791  DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
6792  TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
6793  Chain = TLSIndex.getValue(1);
6794 
6795  // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
6796  // offset into the TLSArray.
6797  TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
6798  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
6799  DAG.getConstant(3, DL, PtrVT));
6800  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
6801  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
6802  MachinePointerInfo());
6803  Chain = TLS.getValue(1);
6804 
6805  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6806  const GlobalValue *GV = GA->getGlobal();
6807  SDValue TGAHi = DAG.getTargetGlobalAddress(
6808  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
6809  SDValue TGALo = DAG.getTargetGlobalAddress(
6810  GV, DL, PtrVT, 0,
6812 
6813  // Add the offset from the start of the .tls section (section base).
6814  SDValue Addr =
6815  SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
6816  DAG.getTargetConstant(0, DL, MVT::i32)),
6817  0);
6818  Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
6819  return Addr;
6820 }
6821 
6822 SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
6823  SelectionDAG &DAG) const {
6824  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6825  if (DAG.getTarget().useEmulatedTLS())
6826  return LowerToTLSEmulatedModel(GA, DAG);
6827 
6828  if (Subtarget->isTargetDarwin())
6829  return LowerDarwinGlobalTLSAddress(Op, DAG);
6830  if (Subtarget->isTargetELF())
6831  return LowerELFGlobalTLSAddress(Op, DAG);
6832  if (Subtarget->isTargetWindows())
6833  return LowerWindowsGlobalTLSAddress(Op, DAG);
6834 
6835  llvm_unreachable("Unexpected platform trying to use TLS");
6836 }
6837 
6838 // Looks through \param Val to determine the bit that can be used to
6839 // check the sign of the value. It returns the unextended value and
6840 // the sign bit position.
6841 std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
6842  if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
6843  return {Val.getOperand(0),
6844  cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
6845  1};
6846 
6847  if (Val.getOpcode() == ISD::SIGN_EXTEND)
6848  return {Val.getOperand(0),
6849  Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
6850 
6851  return {Val, Val.getValueSizeInBits() - 1};
6852 }
6853 
6854 SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
6855  SDValue Chain = Op.getOperand(0);
6856  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
6857  SDValue LHS = Op.getOperand(2);
6858  SDValue RHS = Op.getOperand(3);
6859  SDValue Dest = Op.getOperand(4);
6860  SDLoc dl(Op);
6861 
6862  MachineFunction &MF = DAG.getMachineFunction();
6863  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
6864  // will not be produced, as they are conditional branch instructions that do
6865  // not set flags.
6866  bool ProduceNonFlagSettingCondBr =
6867  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
6868 
6869  // Handle f128 first, since lowering it will result in comparing the return
6870  // value of a libcall against zero, which is just what the rest of LowerBR_CC
6871  // is expecting to deal with.
6872  if (LHS.getValueType() == MVT::f128) {
6873  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
6874 
6875  // If softenSetCCOperands returned a scalar, we need to compare the result
6876  // against zero to select between true and false values.
6877  if (!RHS.getNode()) {
6878  RHS = DAG.getConstant(0, dl, LHS.getValueType());
6879  CC = ISD::SETNE;
6880  }
6881  }
6882 
6883  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
6884  // instruction.
6885  if (ISD::isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
6886  (CC == ISD::SETEQ || CC == ISD::SETNE)) {
6887  // Only lower legal XALUO ops.
6888  if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
6889  return SDValue();
6890 
6891  // The actual operation with overflow check.
6892  AArch64CC::CondCode OFCC;
6893  SDValue Value, Overflow;
6894  std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
6895 
6896  if (CC == ISD::SETNE)
6897  OFCC = getInvertedCondCode(OFCC);
6898  SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
6899 
6900  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
6901  Overflow);
6902  }
6903 
6904  if (LHS.getValueType().isInteger()) {
6905  assert((LHS.getValueType() == RHS.getValueType()) &&
6906  (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
6907 
6908  // If the RHS of the comparison is zero, we can potentially fold this
6909  // to a specialized branch.
6910  const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
6911  if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
6912  if (CC == ISD::SETEQ) {
6913  // See if we can use a TBZ to fold in an AND as well.
6914  // TBZ has a smaller branch displacement than CBZ. If the offset is
6915  // out of bounds, a late MI-layer pass rewrites branches.
6916  // 403.gcc is an example that hits this case.
6917  if (LHS.getOpcode() == ISD::AND &&
6918  isa<ConstantSDNode>(LHS.getOperand(1)) &&
6920  SDValue Test = LHS.getOperand(0);
6921  uint64_t Mask = LHS.getConstantOperandVal(1);
6922  return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
6923  DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
6924  Dest);
6925  }
6926 
6927  return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
6928  } else if (CC == ISD::SETNE) {
6929  // See if we can use a TBZ to fold in an AND as well.
6930  // TBZ has a smaller branch displacement than CBZ. If the offset is
6931  // out of bounds, a late MI-layer pass rewrites branches.
6932  // 403.gcc is an example that hits this case.
6933  if (LHS.getOpcode() == ISD::AND &&
6934  isa<ConstantSDNode>(LHS.getOperand(1)) &&
6936  SDValue Test = LHS.getOperand(0);
6937  uint64_t Mask = LHS.getConstantOperandVal(1);
6938  return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
6939  DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
6940  Dest);
6941  }
6942 
6943  return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
6944  } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
6945  // Don't combine AND since emitComparison converts the AND to an ANDS
6946  // (a.k.a. TST) and the test in the test bit and branch instruction
6947  // becomes redundant. This would also increase register pressure.
6948  uint64_t SignBitPos;
6949  std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
6950  return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
6951  DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
6952  }
6953  }
6954  if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
6955  LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
6956  // Don't combine AND since emitComparison converts the AND to an ANDS
6957  // (a.k.a. TST) and the test in the test bit and branch instruction
6958  // becomes redundant. This would also increase register pressure.
6959  uint64_t SignBitPos;
6960  std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
6961  return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
6962  DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
6963  }
6964 
6965  SDValue CCVal;
6966  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
6967  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
6968  Cmp);
6969  }
6970 
6971  assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 ||
6972  LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
6973 
6974  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
6975  // clean. Some of them require two branches to implement.
6976  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
6977  AArch64CC::CondCode CC1, CC2;
6978  changeFPCCToAArch64CC(CC, CC1, CC2);
6979  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
6980  SDValue BR1 =
6981  DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
6982  if (CC2 != AArch64CC::AL) {
6983  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
6984  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
6985  Cmp);
6986  }
6987 
6988  return BR1;
6989 }
6990 
6991 SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
6992  SelectionDAG &DAG) const {
6993  EVT VT = Op.getValueType();
6994  SDLoc DL(Op);
6995 
6996  SDValue In1 = Op.getOperand(0);
6997  SDValue In2 = Op.getOperand(1);
6998  EVT SrcVT = In2.getValueType();
6999 
7000  if (SrcVT.bitsLT(VT))
7001  In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
7002  else if (SrcVT.bitsGT(VT))
7003  In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
7004 
7005  EVT VecVT;
7006  uint64_t EltMask;
7007  SDValue VecVal1, VecVal2;
7008 
7009  auto setVecVal = [&] (int Idx) {
7010  if (!VT.isVector()) {
7011  VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
7012  DAG.getUNDEF(VecVT), In1);
7013  VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
7014  DAG.getUNDEF(VecVT), In2);
7015  } else {
7016  VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
7017  VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
7018  }
7019  };
7020 
7021  if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
7022  VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
7023  EltMask = 0x80000000ULL;
7024  setVecVal(AArch64::ssub);
7025  } else if (VT == MVT::f64 || VT == MVT::v2f64) {
7026  VecVT = MVT::v2i64;
7027 
7028  // We want to materialize a mask with the high bit set, but the AdvSIMD
7029  // immediate moves cannot materialize that in a single instruction for
7030  // 64-bit elements. Instead, materialize zero and then negate it.
7031  EltMask = 0;
7032 
7033  setVecVal(AArch64::dsub);
7034  } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
7035  VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
7036  EltMask = 0x8000ULL;
7037  setVecVal(AArch64::hsub);
7038  } else {
7039  llvm_unreachable("Invalid type for copysign!");
7040  }
7041 
7042  SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
7043 
7044  // If we couldn't materialize the mask above, then the mask vector will be
7045  // the zero vector, and we need to negate it here.
7046  if (VT == MVT::f64 || VT == MVT::v2f64) {
7047  BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
7048  BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
7049  BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
7050  }
7051 
7052  SDValue Sel =
7053  DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
7054 
7055  if (VT == MVT::f16)
7056  return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
7057  if (VT == MVT::f32)
7058  return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
7059  else if (VT == MVT::f64)
7060  return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
7061  else
7062  return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
7063 }
7064 
7065 SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
7067  Attribute::NoImplicitFloat))
7068  return SDValue();
7069 
7070  if (!Subtarget->hasNEON())
7071  return SDValue();
7072 
7073  // While there is no integer popcount instruction, it can
7074  // be more efficiently lowered to the following sequence that uses
7075  // AdvSIMD registers/instructions as long as the copies to/from
7076  // the AdvSIMD registers are cheap.
7077  // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
7078  // CNT V0.8B, V0.8B // 8xbyte pop-counts
7079  // ADDV B0, V0.8B // sum 8xbyte pop-counts
7080  // UMOV X0, V0.B[0] // copy byte result back to integer reg
7081  SDValue Val = Op.getOperand(0);
7082  SDLoc DL(Op);
7083  EVT VT = Op.getValueType();
7084 
7085  if (VT == MVT::i32 || VT == MVT::i64) {
7086  if (VT == MVT::i32)
7087  Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
7088  Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
7089 
7090  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
7091  SDValue UaddLV = DAG.getNode(
7093  DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
7094 
7095  if (VT == MVT::i64)
7096  UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
7097  return UaddLV;
7098  } else if (VT == MVT::i128) {
7099  Val = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Val);
7100 
7101  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
7102  SDValue UaddLV = DAG.getNode(
7104  DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
7105 
7106  return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
7107  }
7108 
7109  if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
7110  return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
7111 
7112  assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
7113  VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
7114  "Unexpected type for custom ctpop lowering");
7115 
7116  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
7117  Val = DAG.getBitcast(VT8Bit, Val);
7118  Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
7119 
7120  // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
7121  unsigned EltSize = 8;
7122  unsigned NumElts = VT.is64BitVector() ? 8 : 16;
7123  while (EltSize != VT.getScalarSizeInBits()) {
7124  EltSize *= 2;
7125  NumElts /= 2;
7126  MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
7127  Val = DAG.getNode(
7128  ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
7129  DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
7130  }
7131 
7132  return Val;
7133 }
7134 
7135 SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
7136  EVT VT = Op.getValueType();
7137  assert(VT.isScalableVector() ||
7138  useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true));
7139 
7140  SDLoc DL(Op);
7141  SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
7142  return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
7143 }
7144 
7145 SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
7146  SelectionDAG &DAG) const {
7147  EVT VT = Op.getValueType();
7148 
7149  if (VT.isScalableVector() ||
7150  useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
7151  return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
7152  true);
7153 
7154  SDLoc DL(Op);
7155  SDValue REVB;
7156  MVT VST;
7157 
7158  switch (VT.getSimpleVT().SimpleTy) {
7159  default:
7160  llvm_unreachable("Invalid type for bitreverse!");
7161 
7162  case MVT::v2i32: {
7163  VST = MVT::v8i8;
7164  REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
7165 
7166  break;
7167  }
7168 
7169  case MVT::v4i32: {
7170  VST = MVT::v16i8;
7171  REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
7172 
7173  break;
7174  }
7175 
7176  case MVT::v1i64: {
7177  VST = MVT::v8i8;
7178  REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
7179 
7180  break;
7181  }
7182 
7183  case MVT::v2i64: {
7184  VST = MVT::v16i8;
7185  REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
7186 
7187  break;
7188  }
7189  }
7190 
7191  return DAG.getNode(AArch64ISD::NVCAST, DL, VT,
7192  DAG.getNode(ISD::BITREVERSE, DL, VST, REVB));
7193 }
7194 
7195 SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
7196 
7197  if (Op.getValueType().isVector())
7198  return LowerVSETCC(Op, DAG);
7199 
7200  bool IsStrict = Op->isStrictFPOpcode();
7201  bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
7202  unsigned OpNo = IsStrict ? 1 : 0;
7203  SDValue Chain;
7204  if (IsStrict)
7205  Chain = Op.getOperand(0);
7206  SDValue LHS = Op.getOperand(OpNo + 0);
7207  SDValue RHS = Op.getOperand(OpNo + 1);
7208  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
7209  SDLoc dl(Op);
7210 
7211  // We chose ZeroOrOneBooleanContents, so use zero and one.
7212  EVT VT = Op.getValueType();
7213  SDValue TVal = DAG.getConstant(1, dl, VT);
7214  SDValue FVal = DAG.getConstant(0, dl, VT);
7215 
7216  // Handle f128 first, since one possible outcome is a normal integer
7217  // comparison which gets picked up by the next if statement.
7218  if (LHS.getValueType() == MVT::f128) {
7219  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
7220  IsSignaling);
7221 
7222  // If softenSetCCOperands returned a scalar, use it.
7223  if (!RHS.getNode()) {
7224  assert(LHS.getValueType() == Op.getValueType() &&
7225  "Unexpected setcc expansion!");
7226  return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
7227  }
7228  }
7229 
7230  if (LHS.getValueType().isInteger()) {
7231  SDValue CCVal;
7232  SDValue Cmp = getAArch64Cmp(
7233  LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
7234 
7235  // Note that we inverted the condition above, so we reverse the order of
7236  // the true and false operands here. This will allow the setcc to be
7237  // matched to a single CSINC instruction.
7238  SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
7239  return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
7240  }
7241 
7242  // Now we know we're dealing with FP values.
7243  assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
7244  LHS.getValueType() == MVT::f64);
7245 
7246  // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
7247  // and do the comparison.
7248  SDValue Cmp;
7249  if (IsStrict)
7250  Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
7251  else
7252  Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
7253 
7254  AArch64CC::CondCode CC1, CC2;
7255  changeFPCCToAArch64CC(CC, CC1, CC2);
7256  SDValue Res;
7257  if (CC2 == AArch64CC::AL) {
7259  CC2);
7260  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7261 
7262  // Note that we inverted the condition above, so we reverse the order of
7263  // the true and false operands here. This will allow the setcc to be
7264  // matched to a single CSINC instruction.
7265  Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
7266  } else {
7267  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
7268  // totally clean. Some of them require two CSELs to implement. As is in
7269  // this case, we emit the first CSEL and then emit a second using the output
7270  // of the first as the RHS. We're effectively OR'ing the two CC's together.
7271 
7272  // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
7273  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7274  SDValue CS1 =
7275  DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
7276 
7277  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
7278  Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
7279  }
7280  return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
7281 }
7282 
7283 SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
7284  SDValue RHS, SDValue TVal,
7285  SDValue FVal, const SDLoc &dl,
7286  SelectionDAG &DAG) const {
7287  // Handle f128 first, because it will result in a comparison of some RTLIB
7288  // call result against zero.
7289  if (LHS.getValueType() == MVT::f128) {
7290  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
7291 
7292  // If softenSetCCOperands returned a scalar, we need to compare the result
7293  // against zero to select between true and false values.
7294  if (!RHS.getNode()) {
7295  RHS = DAG.getConstant(0, dl, LHS.getValueType());
7296  CC = ISD::SETNE;
7297  }
7298  }
7299 
7300  // Also handle f16, for which we need to do a f32 comparison.
7301  if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
7302  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
7303  RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
7304  }
7305 
7306  // Next, handle integers.
7307  if (LHS.getValueType().isInteger()) {
7308  assert((LHS.getValueType() == RHS.getValueType()) &&
7309  (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
7310 
7311  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
7312  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
7313  ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
7314  // Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
7315  // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
7316  // supported types.
7317  if (CC == ISD::SETGT && RHSC && RHSC->isAllOnesValue() && CTVal && CFVal &&
7318  CTVal->isOne() && CFVal->isAllOnesValue() &&
7319  LHS.getValueType() == TVal.getValueType()) {
7320  EVT VT = LHS.getValueType();
7321  SDValue Shift =
7322  DAG.getNode(ISD::SRA, dl, VT, LHS,
7323  DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
7324  return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
7325  }
7326 
7327  unsigned Opcode = AArch64ISD::CSEL;
7328 
7329  // If both the TVal and the FVal are constants, see if we can swap them in
7330  // order to for a CSINV or CSINC out of them.
7331  if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
7332  std::swap(TVal, FVal);
7333  std::swap(CTVal, CFVal);
7334  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7335  } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
7336  std::swap(TVal, FVal);
7337  std::swap(CTVal, CFVal);
7338  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7339  } else if (TVal.getOpcode() == ISD::XOR) {
7340  // If TVal is a NOT we want to swap TVal and FVal so that we can match
7341  // with a CSINV rather than a CSEL.
7342  if (isAllOnesConstant(TVal.getOperand(1))) {
7343  std::swap(TVal, FVal);
7344  std::swap(CTVal, CFVal);
7345  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7346  }
7347  } else if (TVal.getOpcode() == ISD::SUB) {
7348  // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
7349  // that we can match with a CSNEG rather than a CSEL.
7350  if (isNullConstant(TVal.getOperand(0))) {
7351  std::swap(TVal, FVal);
7352  std::swap(CTVal, CFVal);
7353  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7354  }
7355  } else if (CTVal && CFVal) {
7356  const int64_t TrueVal = CTVal->getSExtValue();
7357  const int64_t FalseVal = CFVal->getSExtValue();
7358  bool Swap = false;
7359 
7360  // If both TVal and FVal are constants, see if FVal is the
7361  // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
7362  // instead of a CSEL in that case.
7363  if (TrueVal == ~FalseVal) {
7364  Opcode = AArch64ISD::CSINV;
7366  TrueVal == -FalseVal) {
7367  Opcode = AArch64ISD::CSNEG;
7368  } else if (TVal.getValueType() == MVT::i32) {
7369  // If our operands are only 32-bit wide, make sure we use 32-bit
7370  // arithmetic for the check whether we can use CSINC. This ensures that
7371  // the addition in the check will wrap around properly in case there is
7372  // an overflow (which would not be the case if we do the check with
7373  // 64-bit arithmetic).
7374  const uint32_t TrueVal32 = CTVal->getZExtValue();
7375  const uint32_t FalseVal32 = CFVal->getZExtValue();
7376 
7377  if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
7378  Opcode = AArch64ISD::CSINC;
7379 
7380  if (TrueVal32 > FalseVal32) {
7381  Swap = true;
7382  }
7383  }
7384  // 64-bit check whether we can use CSINC.
7385  } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
7386  Opcode = AArch64ISD::CSINC;
7387 
7388  if (TrueVal > FalseVal) {
7389  Swap = true;
7390  }
7391  }
7392 
7393  // Swap TVal and FVal if necessary.
7394  if (Swap) {
7395  std::swap(TVal, FVal);
7396  std::swap(CTVal, CFVal);
7397  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7398  }
7399 
7400  if (Opcode != AArch64ISD::CSEL) {
7401  // Drop FVal since we can get its value by simply inverting/negating
7402  // TVal.
7403  FVal = TVal;
7404  }
7405  }
7406 
7407  // Avoid materializing a constant when possible by reusing a known value in
7408  // a register. However, don't perform this optimization if the known value
7409  // is one, zero or negative one in the case of a CSEL. We can always
7410  // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
7411  // FVal, respectively.
7412  ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
7413  if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
7414  !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
7416  // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
7417  // "a != C ? x : a" to avoid materializing C.
7418  if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
7419  TVal = LHS;
7420  else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
7421  FVal = LHS;
7422  } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
7423  assert (CTVal && CFVal && "Expected constant operands for CSNEG.");
7424  // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
7425  // avoid materializing C.
7427  if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
7428  Opcode = AArch64ISD::CSINV;
7429  TVal = LHS;
7430  FVal = DAG.getConstant(0, dl, FVal.getValueType());
7431  }
7432  }
7433 
7434  SDValue CCVal;
7435  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
7436  EVT VT = TVal.getValueType();
7437  return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
7438  }
7439 
7440  // Now we know we're dealing with FP values.
7441  assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
7442  LHS.getValueType() == MVT::f64);
7443  assert(LHS.getValueType() == RHS.getValueType());
7444  EVT VT = TVal.getValueType();
7445  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
7446 
7447  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
7448  // clean. Some of them require two CSELs to implement.
7449  AArch64CC::CondCode CC1, CC2;
7450  changeFPCCToAArch64CC(CC, CC1, CC2);
7451 
7452  if (DAG.getTarget().Options.UnsafeFPMath) {
7453  // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
7454  // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
7455  ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
7456  if (RHSVal && RHSVal->isZero()) {
7457  ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
7458  ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
7459 
7460  if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
7461  CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
7462  TVal = LHS;
7463  else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
7464  CFVal && CFVal->isZero() &&
7465  FVal.getValueType() == LHS.getValueType())
7466  FVal = LHS;
7467  }
7468  }
7469 
7470  // Emit first, and possibly only, CSEL.
7471  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7472  SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
7473 
7474  // If we need a second CSEL, emit it, using the output of the first as the
7475  // RHS. We're effectively OR'ing the two CC's together.
7476  if (CC2 != AArch64CC::AL) {
7477  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
7478  return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
7479  }
7480 
7481  // Otherwise, return the output of the first CSEL.
7482  return CS1;
7483 }
7484 
7485 SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
7486  SelectionDAG &DAG) const {
7487 
7488  EVT Ty = Op.getValueType();
7489  auto Idx = Op.getConstantOperandAPInt(2);
7490  if (Idx.sge(-1) && Idx.slt(Ty.getVectorMinNumElements()))
7491  return Op;
7492  return SDValue();
7493 }
7494 
7495 SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
7496  SelectionDAG &DAG) const {
7497  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7498  SDValue LHS = Op.getOperand(0);
7499  SDValue RHS = Op.getOperand(1);
7500  SDValue TVal = Op.getOperand(2);
7501  SDValue FVal = Op.getOperand(3);
7502  SDLoc DL(Op);
7503  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
7504 }
7505 
7506 SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
7507  SelectionDAG &DAG) const {
7508  SDValue CCVal = Op->getOperand(0);
7509  SDValue TVal = Op->getOperand(1);
7510  SDValue FVal = Op->getOperand(2);
7511  SDLoc DL(Op);
7512 
7513  EVT Ty = Op.getValueType();
7514  if (Ty.isScalableVector()) {
7515  SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);
7517  SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);
7518  return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
7519  }
7520 
7521  if (useSVEForFixedLengthVectorVT(Ty)) {
7522  // FIXME: Ideally this would be the same as above using i1 types, however
7523  // for the moment we can't deal with fixed i1 vector types properly, so
7524  // instead extend the predicate to a result type sized integer vector.
7525  MVT SplatValVT = MVT::getIntegerVT(Ty.getScalarSizeInBits());
7526  MVT PredVT = MVT::getVectorVT(SplatValVT, Ty.getVectorElementCount());
7527  SDValue SplatVal = DAG.getSExtOrTrunc(CCVal, DL, SplatValVT);
7528  SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, SplatVal);
7529  return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
7530  }
7531 
7532  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
7533  // instruction.
7534  if (ISD::isOverflowIntrOpRes(CCVal)) {
7535  // Only lower legal XALUO ops.
7536  if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
7537  return SDValue();
7538 
7539  AArch64CC::CondCode OFCC;
7540  SDValue Value, Overflow;
7541  std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
7542  SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
7543 
7544  return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
7545  CCVal, Overflow);
7546  }
7547 
7548  // Lower it the same way as we would lower a SELECT_CC node.
7549  ISD::CondCode CC;
7550  SDValue LHS, RHS;
7551  if (CCVal.getOpcode() == ISD::SETCC) {
7552  LHS = CCVal.getOperand(0);
7553  RHS = CCVal.getOperand(1);
7554  CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get();
7555  } else {
7556  LHS = CCVal;
7557  RHS = DAG.getConstant(0, DL, CCVal.getValueType());
7558  CC = ISD::SETNE;
7559  }
7560  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
7561 }
7562 
7563 SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
7564  SelectionDAG &DAG) const {
7565  // Jump table entries as PC relative offsets. No additional tweaking
7566  // is necessary here. Just get the address of the jump table.
7567  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
7568 
7570  !Subtarget->isTargetMachO()) {
7571  return getAddrLarge(JT, DAG);
7572  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
7573  return getAddrTiny(JT, DAG);
7574  }
7575  return getAddr(JT, DAG);
7576 }
7577 
7578 SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
7579  SelectionDAG &DAG) const {
7580  // Jump table entries as PC relative offsets. No additional tweaking
7581  // is necessary here. Just get the address of the jump table.
7582  SDLoc DL(Op);
7583  SDValue JT = Op.getOperand(1);
7584  SDValue Entry = Op.getOperand(2);
7585  int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
7586 
7587  auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
7588  AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
7589 
7590  SDNode *Dest =
7591  DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
7592  Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
7593  return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
7594  SDValue(Dest, 0));
7595 }
7596 
7597 SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
7598  SelectionDAG &DAG) const {
7599  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
7600 
7602  // Use the GOT for the large code model on iOS.
7603  if (Subtarget->isTargetMachO()) {
7604  return getGOT(CP, DAG);
7605  }
7606  return getAddrLarge(CP, DAG);
7607  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
7608  return getAddrTiny(CP, DAG);
7609  } else {
7610  return getAddr(CP, DAG);
7611  }
7612 }
7613 
7614 SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
7615  SelectionDAG &DAG) const {
7616  BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
7618  !Subtarget->isTargetMachO()) {
7619  return getAddrLarge(BA, DAG);
7620  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
7621  return getAddrTiny(BA, DAG);
7622  }
7623  return getAddr(BA, DAG);
7624 }
7625 
7626 SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
7627  SelectionDAG &DAG) const {
7628  AArch64FunctionInfo *FuncInfo =
7630 
7631  SDLoc DL(Op);
7632  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
7633  getPointerTy(DAG.getDataLayout()));
7634  FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
7635  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7636  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
7637  MachinePointerInfo(SV));
7638 }
7639 
7640 SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
7641  SelectionDAG &DAG) const {
7642  AArch64FunctionInfo *FuncInfo =
7644 
7645  SDLoc DL(Op);
7646  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
7647  ? FuncInfo->getVarArgsGPRIndex()
7648  : FuncInfo->getVarArgsStackIndex(),
7649  getPointerTy(DAG.getDataLayout()));
7650  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7651  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
7652  MachinePointerInfo(SV));
7653 }
7654 
7655 SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
7656  SelectionDAG &DAG) const {
7657  // The layout of the va_list struct is specified in the AArch64 Procedure Call
7658  // Standard, section B.3.
7659  MachineFunction &MF = DAG.getMachineFunction();
7661  unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
7662  auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
7663  auto PtrVT = getPointerTy(DAG.getDataLayout());
7664  SDLoc DL(Op);
7665 
7666  SDValue Chain = Op.getOperand(0);
7667  SDValue VAList = Op.getOperand(1);
7668  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7669  SmallVector<SDValue, 4> MemOps;
7670 
7671  // void *__stack at offset 0
7672  unsigned Offset = 0;
7673  SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
7674  Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
7675  MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
7676  MachinePointerInfo(SV), Align(PtrSize)));
7677 
7678  // void *__gr_top at offset 8 (4 on ILP32)
7679  Offset += PtrSize;
7680  int GPRSize = FuncInfo->getVarArgsGPRSize();
7681  if (GPRSize > 0) {
7682  SDValue GRTop, GRTopAddr;
7683 
7684  GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7685  DAG.getConstant(Offset, DL, PtrVT));
7686 
7687  GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
7688  GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
7689  DAG.getConstant(GPRSize, DL, PtrVT));
7690  GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);
7691 
7692  MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
7694  Align(PtrSize)));
7695  }
7696 
7697  // void *__vr_top at offset 16 (8 on ILP32)
7698  Offset += PtrSize;
7699  int FPRSize = FuncInfo->getVarArgsFPRSize();
7700  if (FPRSize > 0) {
7701  SDValue VRTop, VRTopAddr;
7702  VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7703  DAG.getConstant(Offset, DL, PtrVT));
7704 
7705  VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
7706  VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
7707  DAG.getConstant(FPRSize, DL, PtrVT));
7708  VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);
7709 
7710  MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
7712  Align(PtrSize)));
7713  }
7714 
7715  // int __gr_offs at offset 24 (12 on ILP32)
7716  Offset += PtrSize;
7717  SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7718  DAG.getConstant(Offset, DL, PtrVT));
7719  MemOps.push_back(
7720  DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
7721  GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
7722 
7723  // int __vr_offs at offset 28 (16 on ILP32)
7724  Offset += 4;
7725  SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7726  DAG.getConstant(Offset, DL, PtrVT));
7727  MemOps.push_back(
7728  DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
7729  VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
7730 
7731  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
7732 }
7733 
7734 SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
7735  SelectionDAG &DAG) const {
7736  MachineFunction &MF = DAG.getMachineFunction();
7737 
7738  if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
7739  return LowerWin64_VASTART(Op, DAG);
7740  else if (Subtarget->isTargetDarwin())
7741  return LowerDarwin_VASTART(Op, DAG);
7742  else
7743  return LowerAAPCS_VASTART(Op, DAG);
7744 }
7745 
7746 SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
7747  SelectionDAG &DAG) const {
7748  // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
7749  // pointer.
7750  SDLoc DL(Op);
7751  unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
7752  unsigned VaListSize =
7753  (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
7754  ? PtrSize
7755  : Subtarget->isTargetILP32() ? 20 : 32;
7756  const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
7757  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7758 
7759  return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
7760  DAG.getConstant(VaListSize, DL, MVT::i32),
7761  Align(PtrSize), false, false, false,
7762  MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
7763 }
7764 
7765 SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
7766  assert(Subtarget->isTargetDarwin() &&
7767  "automatic va_arg instruction only works on Darwin");
7768 
7769  const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7770  EVT VT = Op.getValueType();
7771  SDLoc DL(Op);
7772  SDValue Chain = Op.getOperand(0);
7773  SDValue Addr = Op.getOperand(1);
7774  MaybeAlign Align(Op.getConstantOperandVal(3));
7775  unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
7776  auto PtrVT = getPointerTy(DAG.getDataLayout());
7777  auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
7778  SDValue VAList =
7779  DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
7780  Chain = VAList.getValue(1);
7781  VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
7782 
7783  if (VT.isScalableVector())
7784  report_fatal_error("Passing SVE types to variadic functions is "
7785  "currently not supported");
7786 
7787  if (Align && *Align > MinSlotSize) {
7788  VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7789  DAG.getConstant(Align->value() - 1, DL, PtrVT));
7790  VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
7791  DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
7792  }
7793 
7794  Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
7795  unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
7796 
7797  // Scalar integer and FP values smaller than 64 bits are implicitly extended
7798  // up to 64 bits. At the very least, we have to increase the striding of the
7799  // vaargs list to match this, and for FP values we need to introduce
7800  // FP_ROUND nodes as well.
7801  if (VT.isInteger() && !VT.isVector())
7802  ArgSize = std::max(ArgSize, MinSlotSize);
7803  bool NeedFPTrunc = false;
7804  if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
7805  ArgSize = 8;
7806  NeedFPTrunc = true;
7807  }
7808 
7809  // Increment the pointer, VAList, to the next vaarg
7810  SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7811  DAG.getConstant(ArgSize, DL, PtrVT));
7812  VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);
7813 
7814  // Store the incremented VAList to the legalized pointer
7815  SDValue APStore =
7816  DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
7817 
7818  // Load the actual argument out of the pointer VAList
7819  if (NeedFPTrunc) {
7820  // Load the value as an f64.
7821  SDValue WideFP =
7822  DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
7823  // Round the value down to an f32.
7824  SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
7825  DAG.getIntPtrConstant(1, DL));
7826  SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
7827  // Merge the rounded value with the chain output of the load.
7828  return DAG.getMergeValues(Ops, DL);
7829  }
7830 
7831  return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
7832 }
7833 
7834 SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
7835  SelectionDAG &DAG) const {
7837  MFI.setFrameAddressIsTaken(true);
7838 
7839  EVT VT = Op.getValueType();
7840  SDLoc DL(Op);
7841  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7842  SDValue FrameAddr =
7843  DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
7844  while (Depth--)
7845  FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
7846  MachinePointerInfo());
7847 
7848  if (Subtarget->isTargetILP32())
7849  FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
7850  DAG.getValueType(VT));
7851 
7852  return FrameAddr;
7853 }
7854 
7855 SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
7856  SelectionDAG &DAG) const {
7858 
7859  EVT VT = getPointerTy(DAG.getDataLayout());
7860  SDLoc DL(Op);
7861  int FI = MFI.CreateFixedObject(4, 0, false);
7862  return DAG.getFrameIndex(FI, VT);
7863 }
7864 
7865 #define GET_REGISTER_MATCHER
7866 #include "AArch64GenAsmMatcher.inc"
7867 
7868 // FIXME? Maybe this could be a TableGen attribute on some registers and
7869 // this table could be generated automatically from RegInfo.
7870 Register AArch64TargetLowering::
7871 getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
7873  if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
7874  const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
7875  unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
7876  if (!Subtarget->isXRegisterReserved(DwarfRegNum))
7877  Reg = 0;
7878  }
7879  if (Reg)
7880  return Reg;
7881  report_fatal_error(Twine("Invalid register name \""
7882  + StringRef(RegName) + "\"."));
7883 }
7884 
7885 SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
7886  SelectionDAG &DAG) const {
7888 
7889  EVT VT = Op.getValueType();
7890  SDLoc DL(Op);
7891 
7892  SDValue FrameAddr =
7893  DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
7895 
7896  return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
7897 }
7898 
7899 SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
7900  SelectionDAG &DAG) const {
7901  MachineFunction &MF = DAG.getMachineFunction();
7902  MachineFrameInfo &MFI = MF.getFrameInfo();
7903  MFI.setReturnAddressIsTaken(true);
7904 
7905  EVT VT = Op.getValueType();
7906  SDLoc DL(Op);
7907  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7908  SDValue ReturnAddress;
7909  if (Depth) {
7910  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
7912  ReturnAddress = DAG.getLoad(
7913  VT, DL, DAG.getEntryNode(),
7914  DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
7915  } else {
7916  // Return LR, which contains the return address. Mark it an implicit
7917  // live-in.
7918  unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
7919  ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
7920  }
7921 
7922  // The XPACLRI instruction assembles to a hint-space instruction before
7923  // Armv8.3-A therefore this instruction can be safely used for any pre
7924  // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
7925  // that instead.
7926  SDNode *St;
7927  if (Subtarget->hasPAuth()) {
7928  St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
7929  } else {
7930  // XPACLRI operates on LR therefore we must move the operand accordingly.
7931  SDValue Chain =
7932  DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
7933  St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
7934  }
7935  return SDValue(St, 0);
7936 }
7937 
7938 /// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
7939 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
7940 SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op,
7941  SelectionDAG &DAG) const {
7942  SDValue Lo, Hi;
7943  expandShiftParts(Op.getNode(), Lo, Hi, DAG);
7944  return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
7945 }
7946 
7948  const GlobalAddressSDNode *GA) const {
7949  // Offsets are folded in the DAG combine rather than here so that we can
7950  // intelligently choose an offset based on the uses.
7951  return false;
7952 }
7953 
7955  bool OptForSize) const {
7956  bool IsLegal = false;
7957  // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
7958  // 16-bit case when target has full fp16 support.
7959  // FIXME: We should be able to handle f128 as well with a clever lowering.
7960  const APInt ImmInt = Imm.bitcastToAPInt();
7961  if (VT == MVT::f64)
7962  IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
7963  else if (VT == MVT::f32)
7964  IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
7965  else if (VT == MVT::f16 && Subtarget->hasFullFP16())
7966  IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
7967  // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
7968  // generate that fmov.
7969 
7970  // If we can not materialize in immediate field for fmov, check if the
7971  // value can be encoded as the immediate operand of a logical instruction.
7972  // The immediate value will be created with either MOVZ, MOVN, or ORR.
7973  if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
7974  // The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
7975  // however the mov+fmov sequence is always better because of the reduced
7976  // cache pressure. The timings are still the same if you consider
7977  // movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
7978  // movw+movk is fused). So we limit up to 2 instrdduction at most.
7981  Insn);
7982  unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
7983  IsLegal = Insn.size() <= Limit;
7984  }
7985 
7986  LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
7987  << " imm value: "; Imm.dump(););
7988  return IsLegal;
7989 }
7990 
7991 //===----------------------------------------------------------------------===//
7992 // AArch64 Optimization Hooks
7993 //===----------------------------------------------------------------------===//
7994 
7995 static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
7996  SDValue Operand, SelectionDAG &DAG,
7997  int &ExtraSteps) {
7998  EVT VT = Operand.getValueType();
7999  if (ST->hasNEON() &&
8000  (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
8001  VT == MVT::f32 || VT == MVT::v1f32 ||
8002  VT == MVT::v2f32 || VT == MVT::v4f32)) {
8003  if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
8004  // For the reciprocal estimates, convergence is quadratic, so the number
8005  // of digits is doubled after each iteration. In ARMv8, the accuracy of
8006  // the initial estimate is 2^-8. Thus the number of extra steps to refine
8007  // the result for float (23 mantissa bits) is 2 and for double (52
8008  // mantissa bits) is 3.
8009  ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
8010 
8011  return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
8012  }
8013 
8014  return SDValue();
8015 }
8016 
8017 SDValue
8018 AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
8019  const DenormalMode &Mode) const {
8020  SDLoc DL(Op);
8021  EVT VT = Op.getValueType();
8022  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8023  SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
8024  return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
8025 }
8026 
8027 SDValue
8028 AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
8029  SelectionDAG &DAG) const {
8030  return Op;
8031 }
8032 
8033 SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
8034  SelectionDAG &DAG, int Enabled,
8035  int &ExtraSteps,
8036  bool &UseOneConst,
8037  bool Reciprocal) const {
8039  (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
8040  if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
8041  DAG, ExtraSteps)) {
8042  SDLoc DL(Operand);
8043  EVT VT = Operand.getValueType();
8044 
8045  SDNodeFlags Flags;
8046  Flags.setAllowReassociation(true);
8047 
8048  // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
8049  // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
8050  for (int i = ExtraSteps; i > 0; --i) {
8051  SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
8052  Flags);
8053  Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
8054  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
8055  }
8056  if (!Reciprocal)
8057  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
8058 
8059  ExtraSteps = 0;
8060  return Estimate;
8061  }
8062 
8063  return SDValue();
8064 }
8065 
8066 SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
8067  SelectionDAG &DAG, int Enabled,
8068  int &ExtraSteps) const {
8070  if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
8071  DAG, ExtraSteps)) {
8072  SDLoc DL(Operand);
8073  EVT VT = Operand.getValueType();
8074 
8075  SDNodeFlags Flags;
8076  Flags.setAllowReassociation(true);
8077 
8078  // Newton reciprocal iteration: E * (2 - X * E)
8079  // AArch64 reciprocal iteration instruction: (2 - M * N)
8080  for (int i = ExtraSteps; i > 0; --i) {
8081  SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
8082  Estimate, Flags);
8083  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
8084  }
8085 
8086  ExtraSteps = 0;
8087  return Estimate;
8088  }
8089 
8090  return SDValue();
8091 }
8092 
8093 //===----------------------------------------------------------------------===//
8094 // AArch64 Inline Assembly Support
8095 //===----------------------------------------------------------------------===//
8096 
8097 // Table of Constraints
8098 // TODO: This is the current set of constraints supported by ARM for the
8099 // compiler, not all of them may make sense.
8100 //
8101 // r - A general register
8102 // w - An FP/SIMD register of some size in the range v0-v31
8103 // x - An FP/SIMD register of some size in the range v0-v15
8104 // I - Constant that can be used with an ADD instruction
8105 // J - Constant that can be used with a SUB instruction
8106 // K - Constant that can be used with a 32-bit logical instruction
8107 // L - Constant that can be used with a 64-bit logical instruction
8108 // M - Constant that can be used as a 32-bit MOV immediate
8109 // N - Constant that can be used as a 64-bit MOV immediate
8110 // Q - A memory reference with base register and no offset
8111 // S - A symbolic address
8112 // Y - Floating point constant zero
8113 // Z - Integer constant zero
8114 //
8115 // Note that general register operands will be output using their 64-bit x
8116 // register name, whatever the size of the variable, unless the asm operand
8117 // is prefixed by the %w modifier. Floating-point and SIMD register operands
8118 // will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
8119 // %q modifier.
8120 const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
8121  // At this point, we have to lower this constraint to something else, so we
8122  // lower it to an "r" or "w". However, by doing this we will force the result
8123  // to be in register, while the X constraint is much more permissive.
8124  //
8125  // Although we are correct (we are free to emit anything, without
8126  // constraints), we might break use cases that would expect us to be more
8127  // efficient and emit something else.
8128  if (!Subtarget->hasFPARMv8())
8129  return "r";
8130 
8131  if (ConstraintVT.isFloatingPoint())
8132  return "w";
8133 
8134  if (ConstraintVT.isVector() &&
8135  (ConstraintVT.getSizeInBits() == 64 ||
8136  ConstraintVT.getSizeInBits() == 128))
8137  return "w";
8138 
8139  return "r";
8140 }
8141 
8145  Invalid
8146 };
8147 
8150  if (Constraint == "Upa")
8152  if (Constraint == "Upl")
8154  return P;
8155 }
8156 
8157 /// getConstraintType - Given a constraint letter, return the type of
8158 /// constraint it is for this target.
8160 AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
8161  if (Constraint.size() == 1) {
8162  switch (Constraint[0]) {
8163  default:
8164  break;
8165  case 'x':
8166  case 'w':
8167  case 'y':
8168  return C_RegisterClass;
8169  // An address with a single base register. Due to the way we
8170  // currently handle addresses it is the same as 'r'.
8171  case 'Q':
8172  return C_Memory;
8173  case 'I':
8174  case 'J':
8175  case 'K':
8176  case 'L':
8177  case 'M':
8178  case 'N':
8179  case 'Y':
8180  case 'Z':
8181  return C_Immediate;
8182  case 'z':
8183  case 'S': // A symbolic address
8184  return C_Other;
8185  }
8186  } else if (parsePredicateConstraint(Constraint) !=
8188  return C_RegisterClass;
8189  return TargetLowering::getConstraintType(Constraint);
8190 }
8191 
8192 /// Examine constraint type and operand type and determine a weight value.
8193 /// This object must already have been set up with the operand type
8194 /// and the current alternative constraint selected.
8196 AArch64TargetLowering::getSingleConstraintMatchWeight(
8197  AsmOperandInfo &info, const char *constraint) const {
8198  ConstraintWeight weight = CW_Invalid;
8199  Value *CallOperandVal = info.CallOperandVal;
8200  // If we don't have a value, we can't do a match,
8201  // but allow it at the lowest weight.
8202  if (!CallOperandVal)
8203  return CW_Default;
8204  Type *type = CallOperandVal->getType();
8205  // Look at the constraint type.
8206  switch (*constraint) {
8207  default:
8209  break;
8210  case 'x':
8211  case 'w':
8212  case 'y':
8213  if (type->isFloatingPointTy() || type->isVectorTy())
8214  weight = CW_Register;
8215  break;
8216  case 'z':
8217  weight = CW_Constant;
8218  break;
8219  case 'U':
8221  weight = CW_Register;
8222  break;
8223  }
8224  return weight;
8225 }
8226 
8227 std::pair<unsigned, const TargetRegisterClass *>
8228 AArch64TargetLowering::getRegForInlineAsmConstraint(
8229  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8230  if (Constraint.size() == 1) {
8231  switch (Constraint[0]) {
8232  case 'r':
8233  if (VT.isScalableVector())
8234  return std::make_pair(0U, nullptr);
8235  if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
8236  return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
8237  if (VT.getFixedSizeInBits() == 64)
8238  return std::make_pair(0U, &AArch64::GPR64commonRegClass);
8239  return std::make_pair(0U, &AArch64::GPR32commonRegClass);
8240  case 'w': {
8241  if (!Subtarget->hasFPARMv8())
8242  break;
8243  if (VT.isScalableVector()) {
8244  if (VT.getVectorElementType() != MVT::i1)
8245  return std::make_pair(0U, &AArch64::ZPRRegClass);
8246  return std::make_pair(0U, nullptr);
8247  }
8248  uint64_t VTSize = VT.getFixedSizeInBits();
8249  if (VTSize == 16)
8250  return std::make_pair(0U, &AArch64::FPR16RegClass);
8251  if (VTSize == 32)
8252  return std::make_pair(0U, &AArch64::FPR32RegClass);
8253  if (VTSize == 64)
8254  return std::make_pair(0U, &AArch64::FPR64RegClass);
8255  if (VTSize == 128)
8256  return std::make_pair(0U, &AArch64::FPR128RegClass);
8257  break;
8258  }
8259  // The instructions that this constraint is designed for can
8260  // only take 128-bit registers so just use that regclass.
8261  case 'x':
8262  if (!Subtarget->hasFPARMv8())
8263  break;
8264  if (VT.isScalableVector())
8265  return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
8266  if (VT.getSizeInBits() == 128)
8267  return std::make_pair(0U, &AArch64::FPR128_loRegClass);
8268  break;
8269  case 'y':
8270  if (!Subtarget->hasFPARMv8())
8271  break;
8272  if (VT.isScalableVector())
8273  return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
8274  break;
8275  }
8276  } else {
8278  if (PC != PredicateConstraint::Invalid) {
8279  if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
8280  return std::make_pair(0U, nullptr);
8281  bool restricted = (PC == PredicateConstraint::Upl);
8282  return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
8283  : std::make_pair(0U, &AArch64::PPRRegClass);
8284  }
8285  }
8286  if (StringRef("{cc}").equals_insensitive(Constraint))
8287  return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
8288 
8289  // Use the default implementation in TargetLowering to convert the register
8290  // constraint into a member of a register class.
8291  std::pair<unsigned, const TargetRegisterClass *> Res;
8292  Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8293 
8294  // Not found as a standard register?
8295  if (!Res.second) {
8296  unsigned Size = Constraint.size();
8297  if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
8298  tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
8299  int RegNo;
8300  bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
8301  if (!Failed && RegNo >= 0 && RegNo <= 31) {
8302  // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
8303  // By default we'll emit v0-v31 for this unless there's a modifier where
8304  // we'll emit the correct register as well.
8305  if (VT != MVT::Other && VT.getSizeInBits() == 64) {
8306  Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
8307  Res.second = &AArch64::FPR64RegClass;
8308  } else {
8309  Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
8310  Res.second = &AArch64::FPR128RegClass;
8311  }
8312  }
8313  }
8314  }
8315 
8316  if (Res.second && !Subtarget->hasFPARMv8() &&
8317  !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
8318  !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
8319  return std::make_pair(0U, nullptr);
8320 
8321  return Res;
8322 }
8323 
8325  llvm::Type *Ty,
8326  bool AllowUnknown) const {
8327  if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
8328  return EVT(MVT::i64x8);
8329 
8330  return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown);
8331 }
8332 
8333 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
8334 /// vector. If it is invalid, don't add anything to Ops.
8335 void AArch64TargetLowering::LowerAsmOperandForConstraint(
8336  SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
8337  SelectionDAG &DAG) const {
8338  SDValue Result;
8339 
8340  // Currently only support length 1 constraints.
8341  if (Constraint.length() != 1)
8342  return;
8343 
8344  char ConstraintLetter = Constraint[0];
8345  switch (ConstraintLetter) {
8346  default:
8347  break;
8348 
8349  // This set of constraints deal with valid constants for various instructions.
8350  // Validate and return a target constant for them if we can.
8351  case 'z': {
8352  // 'z' maps to xzr or wzr so it needs an input of 0.
8353  if (!isNullConstant(Op))
8354  return;
8355 
8356  if (Op.getValueType() == MVT::i64)
8357  Result = DAG.getRegister(AArch64::XZR, MVT::i64);
8358  else
8359  Result = DAG.getRegister(AArch64::WZR, MVT::i32);
8360  break;
8361  }
8362  case 'S': {
8363  // An absolute symbolic address or label reference.
8364  if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
8365  Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
8366  GA->getValueType(0));
8367  } else if (const BlockAddressSDNode *BA =
8368  dyn_cast<BlockAddressSDNode>(Op)) {
8369  Result =
8371  } else
8372  return;
8373  break;
8374  }
8375 
8376  case 'I':
8377  case 'J':
8378  case 'K':
8379  case 'L':
8380  case 'M':
8381  case 'N':
8382  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
8383  if (!C)
8384  return;
8385 
8386  // Grab the value and do some validation.
8387  uint64_t CVal = C->getZExtValue();
8388  switch (ConstraintLetter) {
8389  // The I constraint applies only to simple ADD or SUB immediate operands:
8390  // i.e. 0 to 4095 with optional shift by 12
8391  // The J constraint applies only to ADD or SUB immediates that would be
8392  // valid when negated, i.e. if [an add pattern] were to be output as a SUB
8393  // instruction [or vice versa], in other words -1 to -4095 with optional
8394  // left shift by 12.
8395  case 'I':
8396  if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
8397  break;
8398  return;
8399  case 'J': {
8400  uint64_t NVal = -C->getSExtValue();
8401  if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
8402  CVal = C->getSExtValue();
8403  break;
8404  }
8405  return;
8406  }
8407  // The K and L constraints apply *only* to logical immediates, including
8408  // what used to be the MOVI alias for ORR (though the MOVI alias has now
8409  // been removed and MOV should be used). So these constraints have to
8410  // distinguish between bit patterns that are valid 32-bit or 64-bit
8411  // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
8412  // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
8413  // versa.
8414  case 'K':
8415  if (AArch64_AM::isLogicalImmediate(CVal, 32))
8416  break;
8417  return;
8418  case 'L':
8419  if (AArch64_AM::isLogicalImmediate(CVal, 64))
8420  break;
8421  return;
8422  // The M and N constraints are a superset of K and L respectively, for use
8423  // with the MOV (immediate) alias. As well as the logical immediates they
8424  // also match 32 or 64-bit immediates that can be loaded either using a
8425  // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
8426  // (M) or 64-bit 0x1234000000000000 (N) etc.
8427  // As a note some of this code is liberally stolen from the asm parser.
8428  case 'M': {
8429  if (!isUInt<32>(CVal))
8430  return;
8431  if (AArch64_AM::isLogicalImmediate(CVal, 32))
8432  break;
8433  if ((CVal & 0xFFFF) == CVal)
8434  break;
8435  if ((CVal & 0xFFFF0000ULL) == CVal)
8436  break;
8437  uint64_t NCVal = ~(uint32_t)CVal;
8438  if ((NCVal & 0xFFFFULL) == NCVal)
8439  break;
8440  if ((NCVal & 0xFFFF0000ULL) == NCVal)
8441  break;
8442  return;
8443  }
8444  case 'N': {
8445  if (AArch64_AM::isLogicalImmediate(CVal, 64))
8446  break;
8447  if ((CVal & 0xFFFFULL) == CVal)
8448  break;
8449  if ((CVal & 0xFFFF0000ULL) == CVal)
8450  break;
8451  if ((CVal & 0xFFFF00000000ULL) == CVal)
8452  break;
8453  if ((CVal & 0xFFFF000000000000ULL) == CVal)
8454  break;
8455  uint64_t NCVal = ~CVal;
8456  if ((NCVal & 0xFFFFULL) == NCVal)
8457  break;
8458  if ((NCVal & 0xFFFF0000ULL) == NCVal)
8459  break;
8460  if ((NCVal & 0xFFFF00000000ULL) == NCVal)
8461  break;
8462  if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
8463  break;
8464  return;
8465  }
8466  default:
8467  return;
8468  }
8469 
8470  // All assembler immediates are 64-bit integers.
8471  Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
8472  break;
8473  }
8474 
8475  if (Result.getNode()) {
8476  Ops.push_back(Result);
8477  return;
8478  }
8479 
8480  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
8481 }
8482 
8483 //===----------------------------------------------------------------------===//
8484 // AArch64 Advanced SIMD Support
8485 //===----------------------------------------------------------------------===//
8486 
8487 /// WidenVector - Given a value in the V64 register class, produce the
8488 /// equivalent value in the V128 register class.
8489 static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
8490  EVT VT = V64Reg.getValueType();
8491  unsigned NarrowSize = VT.getVectorNumElements();
8492  MVT EltTy = VT.getVectorElementType().getSimpleVT();
8493  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
8494  SDLoc DL(V64Reg);
8495 
8496  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
8497  V64Reg, DAG.getConstant(0, DL, MVT::i64));
8498 }
8499 
8500 /// getExtFactor - Determine the adjustment factor for the position when
8501 /// generating an "extract from vector registers" instruction.
8502 static unsigned getExtFactor(SDValue &V) {
8503  EVT EltType = V.getValueType().getVectorElementType();
8504  return EltType.getSizeInBits() / 8;
8505 }
8506 
8507 /// NarrowVector - Given a value in the V128 register class, produce the
8508 /// equivalent value in the V64 register class.
8509 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
8510  EVT VT = V128Reg.getValueType();
8511  unsigned WideSize = VT.getVectorNumElements();
8512  MVT EltTy = VT.getVectorElementType().getSimpleVT();
8513  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
8514  SDLoc DL(V128Reg);
8515 
8516  return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
8517 }
8518 
8519 // Gather data to see if the operation can be modelled as a
8520 // shuffle in combination with VEXTs.
8522  SelectionDAG &DAG) const {
8523  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
8524  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n");
8525  SDLoc dl(Op);
8526  EVT VT = Op.getValueType();
8527  assert(!VT.isScalableVector() &&
8528  "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
8529  unsigned NumElts = VT.getVectorNumElements();
8530 
8531  struct ShuffleSourceInfo {
8532  SDValue Vec;
8533  unsigned MinElt;
8534  unsigned MaxElt;
8535 
8536  // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8537  // be compatible with the shuffle we intend to construct. As a result
8538  // ShuffleVec will be some sliding window into the original Vec.
8539  SDValue ShuffleVec;
8540 
8541  // Code should guarantee that element i in Vec starts at element "WindowBase
8542  // + i * WindowScale in ShuffleVec".
8543  int WindowBase;
8544  int WindowScale;
8545 
8546  ShuffleSourceInfo(SDValue Vec)
8547  : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
8548  ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
8549 
8550  bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8551  };
8552 
8553  // First gather all vectors used as an immediate source for this BUILD_VECTOR
8554  // node.
8556  for (unsigned i = 0; i < NumElts; ++i) {
8557  SDValue V = Op.getOperand(i);
8558  if (V.isUndef())
8559  continue;
8560  else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
8561  !isa<ConstantSDNode>(V.getOperand(1))) {
8562  LLVM_DEBUG(
8563  dbgs() << "Reshuffle failed: "
8564  "a shuffle can only come from building a vector from "
8565  "various elements of other vectors, provided their "
8566  "indices are constant\n");
8567  return SDValue();
8568  }
8569 
8570  // Add this element source to the list if it's not already there.
8571  SDValue SourceVec = V.getOperand(0);
8572  auto Source = find(Sources, SourceVec);
8573  if (Source == Sources.end())
8574  Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8575 
8576  // Update the minimum and maximum lane number seen.
8577  unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
8578  Source->MinElt = std::min(Source->MinElt, EltNo);
8579  Source->MaxElt = std::max(Source->MaxElt, EltNo);
8580  }
8581 
8582  if (Sources.size() > 2) {
8583  LLVM_DEBUG(
8584  dbgs() << "Reshuffle failed: currently only do something sane when at "
8585  "most two source vectors are involved\n");
8586  return SDValue();
8587  }
8588 
8589  // Find out the smallest element size among result and two sources, and use
8590  // it as element size to build the shuffle_vector.
8591  EVT SmallestEltTy = VT.getVectorElementType();
8592  for (auto &Source : Sources) {
8593  EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8594  if (SrcEltTy.bitsLT(SmallestEltTy)) {
8595  SmallestEltTy = SrcEltTy;
8596  }
8597  }
8598  unsigned ResMultiplier =
8599  VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
8600  uint64_t VTSize = VT.getFixedSizeInBits();
8601  NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
8602  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8603 
8604  // If the source vector is too wide or too narrow, we may nevertheless be able
8605  // to construct a compatible shuffle either by concatenating it with UNDEF or
8606  // extracting a suitable range of elements.
8607  for (auto &Src : Sources) {
8608  EVT SrcVT = Src.ShuffleVec.getValueType();
8609 
8610  uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8611  if (SrcVTSize == VTSize)
8612  continue;
8613 
8614  // This stage of the search produces a source with the same element type as
8615  // the original, but with a total width matching the BUILD_VECTOR output.
8616  EVT EltVT = SrcVT.getVectorElementType();
8617  unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8618  EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8619 
8620  if (SrcVTSize < VTSize) {
8621  assert(2 * SrcVTSize == VTSize);
8622  // We can pad out the smaller vector for free, so if it's part of a
8623  // shuffle...
8624  Src.ShuffleVec =
8625  DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8626  DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8627  continue;
8628  }
8629 
8630  if (SrcVTSize != 2 * VTSize) {
8631  LLVM_DEBUG(
8632  dbgs() << "Reshuffle failed: result vector too small to extract\n");
8633  return SDValue();
8634  }
8635 
8636  if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8637  LLVM_DEBUG(
8638  dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n");
8639  return SDValue();
8640  }
8641 
8642  if (Src.MinElt >= NumSrcElts) {
8643  // The extraction can just take the second half
8644  Src.ShuffleVec =
8645  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8646  DAG.getConstant(NumSrcElts, dl, MVT::i64));
8647  Src.WindowBase = -NumSrcElts;
8648  } else if (Src.MaxElt < NumSrcElts) {
8649  // The extraction can just take the first half
8650  Src.ShuffleVec =
8651  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8652  DAG.getConstant(0, dl, MVT::i64));
8653  } else {
8654  // An actual VEXT is needed
8655  SDValue VEXTSrc1 =
8656  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8657  DAG.getConstant(0, dl, MVT::i64));
8658  SDValue VEXTSrc2 =
8659  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8660  DAG.getConstant(NumSrcElts, dl, MVT::i64));
8661  unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
8662 
8663  if (!SrcVT.is64BitVector()) {
8664  LLVM_DEBUG(
8665  dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
8666  "for SVE vectors.");
8667  return SDValue();
8668  }
8669 
8670  Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
8671  VEXTSrc2,
8672  DAG.getConstant(Imm, dl, MVT::i32));
8673  Src.WindowBase = -Src.MinElt;
8674  }
8675  }
8676 
8677  // Another possible incompatibility occurs from the vector element types. We
8678  // can fix this by bitcasting the source vectors to the same type we intend
8679  // for the shuffle.
8680  for (auto &Src : Sources) {
8681  EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8682  if (SrcEltTy == SmallestEltTy)
8683  continue;
8684  assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
8685  Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
8686  Src.WindowScale =
8687  SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
8688  Src.WindowBase *= Src.WindowScale;
8689  }
8690 
8691  // Final sanity check before we try to actually produce a shuffle.
8692  LLVM_DEBUG(for (auto Src
8693  : Sources)
8694  assert(Src.ShuffleVec.getValueType() == ShuffleVT););
8695 
8696  // The stars all align, our next step is to produce the mask for the shuffle.
8698  int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8699  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8700  SDValue Entry = Op.getOperand(i);
8701  if (Entry.isUndef())
8702  continue;
8703 
8704  auto Src = find(Sources, Entry.getOperand(0));
8705  int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8706 
8707  // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8708  // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8709  // segment.
8710  EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8711  int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8712  VT.getScalarSizeInBits());
8713  int LanesDefined = BitsDefined / BitsPerShuffleLane;
8714 
8715  // This source is expected to fill ResMultiplier lanes of the final shuffle,
8716  // starting at the appropriate offset.
8717  int *LaneMask = &Mask[i * ResMultiplier];
8718 
8719  int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8720  ExtractBase += NumElts * (Src - Sources.begin());
8721  for (int j = 0; j < LanesDefined; ++j)
8722  LaneMask[j] = ExtractBase + j;
8723  }
8724 
8725  // Final check before we try to produce nonsense...
8726  if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
8727  LLVM_DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n");
8728  return SDValue();
8729  }
8730 
8731  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8732  for (unsigned i = 0; i < Sources.size(); ++i)
8733  ShuffleOps[i] = Sources[i].ShuffleVec;
8734 
8735  SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8736  ShuffleOps[1], Mask);
8737  SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
8738 
8739  LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
8740  dbgs() << "Reshuffle, creating node: "; V.dump(););
8741 
8742  return V;
8743 }
8744 
8745 // check if an EXT instruction can handle the shuffle mask when the
8746 // vector sources of the shuffle are the same.
8747 static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
8748  unsigned NumElts = VT.getVectorNumElements();
8749 
8750  // Assume that the first shuffle index is not UNDEF. Fail if it is.
8751  if (M[0] < 0)
8752  return false;
8753 
8754  Imm = M[0];
8755 
8756  // If this is a VEXT shuffle, the immediate value is the index of the first
8757  // element. The other shuffle indices must be the successive elements after
8758  // the first one.
8759  unsigned ExpectedElt = Imm;
8760  for (unsigned i = 1; i < NumElts; ++i) {
8761  // Increment the expected index. If it wraps around, just follow it
8762  // back to index zero and keep going.
8763  ++ExpectedElt;
8764  if (ExpectedElt == NumElts)
8765  ExpectedElt = 0;
8766 
8767  if (M[i] < 0)
8768  continue; // ignore UNDEF indices
8769  if (ExpectedElt != static_cast<unsigned>(M[i]))
8770  return false;
8771  }
8772 
8773  return true;
8774 }
8775 
8776 /// Check if a vector shuffle corresponds to a DUP instructions with a larger
8777 /// element width than the vector lane type. If that is the case the function
8778 /// returns true and writes the value of the DUP instruction lane operand into
8779 /// DupLaneOp
8780 static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
8781  unsigned &DupLaneOp) {
8782  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
8783  "Only possible block sizes for wide DUP are: 16, 32, 64");
8784 
8785  if (BlockSize <= VT.getScalarSizeInBits())
8786  return false;
8787  if (BlockSize % VT.getScalarSizeInBits() != 0)
8788  return false;
8789  if (VT.getSizeInBits() % BlockSize != 0)
8790  return false;
8791 
8792  size_t SingleVecNumElements = VT.getVectorNumElements();
8793  size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
8794  size_t NumBlocks = VT.getSizeInBits() / BlockSize;
8795 
8796  // We are looking for masks like
8797  // [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
8798  // might be replaced by 'undefined'. BlockIndices will eventually contain
8799  // lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
8800  // for the above examples)
8801  SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
8802  for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
8803  for (size_t I = 0; I < NumEltsPerBlock; I++) {
8804  int Elt = M[BlockIndex * NumEltsPerBlock + I];
8805  if (Elt < 0)
8806  continue;
8807  // For now we don't support shuffles that use the second operand
8808  if ((unsigned)Elt >= SingleVecNumElements)
8809  return false;
8810  if (BlockElts[I] < 0)
8811  BlockElts[I] = Elt;
8812  else if (BlockElts[I] != Elt)
8813  return false;
8814  }
8815 
8816  // We found a candidate block (possibly with some undefs). It must be a
8817  // sequence of consecutive integers starting with a value divisible by
8818  // NumEltsPerBlock with some values possibly replaced by undef-s.
8819 
8820  // Find first non-undef element
8821  auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
8822  assert(FirstRealEltIter != BlockElts.end() &&
8823  "Shuffle with all-undefs must have been caught by previous cases, "
8824  "e.g. isSplat()");
8825  if (FirstRealEltIter == BlockElts.end()) {
8826  DupLaneOp = 0;
8827  return true;
8828  }
8829 
8830  // Index of FirstRealElt in BlockElts
8831  size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
8832 
8833  if ((unsigned)*FirstRealEltIter < FirstRealIndex)
8834  return false;
8835  // BlockElts[0] must have the following value if it isn't undef:
8836  size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
8837 
8838  // Check the first element
8839  if (Elt0 % NumEltsPerBlock != 0)
8840  return false;
8841  // Check that the sequence indeed consists of consecutive integers (modulo
8842  // undefs)
8843  for (size_t I = 0; I < NumEltsPerBlock; I++)
8844  if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
8845  return false;
8846 
8847  DupLaneOp = Elt0 / NumEltsPerBlock;
8848  return true;
8849 }
8850 
8851 // check if an EXT instruction can handle the shuffle mask when the
8852 // vector sources of the shuffle are different.
8853 static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
8854  unsigned &Imm) {
8855  // Look for the first non-undef element.
8856  const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
8857 
8858  // Benefit form APInt to handle overflow when calculating expected element.
8859  unsigned NumElts = VT.getVectorNumElements();
8860  unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
8861  APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
8862  // The following shuffle indices must be the successive elements after the
8863  // first real element.
8864  const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
8865  [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
8866  if (FirstWrongElt != M.end())
8867  return false;
8868 
8869  // The index of an EXT is the first element if it is not UNDEF.
8870  // Watch out for the beginning UNDEFs. The EXT index should be the expected
8871  // value of the first element. E.g.
8872  // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
8873  // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
8874  // ExpectedElt is the last mask index plus 1.
8875  Imm = ExpectedElt.getZExtValue();
8876 
8877  // There are two difference cases requiring to reverse input vectors.
8878  // For example, for vector <4 x i32> we have the following cases,
8879  // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
8880  // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
8881  // For both cases, we finally use mask <5, 6, 7, 0>, which requires
8882  // to reverse two input vectors.
8883  if (Imm < NumElts)
8884  ReverseEXT = true;
8885  else
8886  Imm -= NumElts;
8887 
8888  return true;
8889 }
8890 
8891 /// isREVMask - Check if a vector shuffle corresponds to a REV
8892 /// instruction with the specified blocksize. (The order of the elements
8893 /// within each block of the vector is reversed.)
8894 static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
8895  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
8896  "Only possible block sizes for REV are: 16, 32, 64");
8897 
8898  unsigned EltSz = VT.getScalarSizeInBits();
8899  if (EltSz == 64)
8900  return false;
8901 
8902  unsigned NumElts = VT.getVectorNumElements();
8903  unsigned BlockElts = M[0] + 1;
8904  // If the first shuffle index is UNDEF, be optimistic.
8905  if (M[0] < 0)
8906  BlockElts = BlockSize / EltSz;
8907 
8908  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
8909  return false;
8910 
8911  for (unsigned i = 0; i < NumElts; ++i) {
8912  if (M[i] < 0)
8913  continue; // ignore UNDEF indices
8914  if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
8915  return false;
8916  }
8917 
8918  return true;
8919 }
8920 
8921 static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
8922  unsigned NumElts = VT.getVectorNumElements();
8923  if (NumElts % 2 != 0)
8924  return false;
8925  WhichResult = (M[0] == 0 ? 0 : 1);
8926  unsigned Idx = WhichResult * NumElts / 2;
8927  for (unsigned i = 0; i != NumElts; i += 2) {
8928  if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
8929  (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
8930  return false;
8931  Idx += 1;
8932  }
8933 
8934  return true;
8935 }
8936 
8937 static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
8938  unsigned NumElts = VT.getVectorNumElements();
8939  WhichResult = (M[0] == 0 ? 0 : 1);
8940  for (unsigned i = 0; i != NumElts; ++i) {
8941  if (M[i] < 0)
8942  continue; // ignore UNDEF indices
8943  if ((unsigned)M[i] != 2 * i + WhichResult)
8944  return false;
8945  }
8946 
8947  return true;
8948 }
8949 
8950 static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
8951  unsigned NumElts = VT.getVectorNumElements();
8952  if (NumElts % 2 != 0)
8953  return false;
8954  WhichResult = (M[0] == 0 ? 0 : 1);
8955  for (unsigned i = 0; i < NumElts; i += 2) {
8956  if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
8957  (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
8958  return false;
8959  }
8960  return true;
8961 }
8962 
8963 /// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
8964 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
8965 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
8966 static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
8967  unsigned NumElts = VT.getVectorNumElements();
8968  if (NumElts % 2 != 0)
8969  return false;
8970  WhichResult = (M[0] == 0 ? 0 : 1);
8971  unsigned Idx = WhichResult * NumElts / 2;
8972  for (unsigned i = 0; i != NumElts; i += 2) {
8973  if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
8974  (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
8975  return false;
8976  Idx += 1;
8977  }
8978 
8979  return true;
8980 }
8981 
8982 /// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
8983 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
8984 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
8985 static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
8986  unsigned Half = VT.getVectorNumElements() / 2;
8987  WhichResult = (M[0] == 0 ? 0 : 1);
8988  for (unsigned j = 0; j != 2; ++j) {
8989  unsigned Idx = WhichResult;
8990  for (unsigned i = 0; i != Half; ++i) {
8991  int MIdx = M[i + j * Half];
8992  if (MIdx >= 0 && (unsigned)MIdx != Idx)
8993  return false;
8994  Idx += 2;
8995  }
8996  }
8997 
8998  return true;
8999 }
9000 
9001 /// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
9002 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
9003 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
9004 static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9005  unsigned NumElts = VT.getVectorNumElements();
9006  if (NumElts % 2 != 0)
9007  return false;
9008  WhichResult = (M[0] == 0 ? 0 : 1);
9009  for (unsigned i = 0; i < NumElts; i += 2) {
9010  if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
9011  (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
9012  return false;
9013  }
9014  return true;
9015 }
9016 
9017 static bool isINSMask(ArrayRef<int> M, int NumInputElements,
9018  bool &DstIsLeft, int &Anomaly) {
9019  if (M.size() != static_cast<size_t>(NumInputElements))
9020  return false;
9021 
9022  int NumLHSMatch = 0, NumRHSMatch = 0;
9023  int LastLHSMismatch = -1, LastRHSMismatch = -1;
9024 
9025  for (int i = 0; i < NumInputElements; ++i) {
9026  if (M[i] == -1) {
9027  ++NumLHSMatch;
9028  ++NumRHSMatch;
9029  continue;
9030  }
9031 
9032  if (M[i] == i)
9033  ++NumLHSMatch;
9034  else
9035  LastLHSMismatch = i;
9036 
9037  if (M[i] == i + NumInputElements)
9038  ++NumRHSMatch;
9039  else
9040  LastRHSMismatch = i;
9041  }
9042 
9043  if (NumLHSMatch == NumInputElements - 1) {
9044  DstIsLeft = true;
9045  Anomaly = LastLHSMismatch;
9046  return true;
9047  } else if (NumRHSMatch == NumInputElements - 1) {
9048  DstIsLeft = false;
9049  Anomaly = LastRHSMismatch;
9050  return true;
9051  }
9052 
9053  return false;
9054 }
9055 
9056 static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
9057  if (VT.getSizeInBits() != 128)
9058  return false;
9059 
9060  unsigned NumElts = VT.getVectorNumElements();
9061 
9062  for (int I = 0, E = NumElts / 2; I != E; I++) {
9063  if (Mask[I] != I)
9064  return false;
9065  }
9066 
9067  int Offset = NumElts / 2;
9068  for (int I = NumElts / 2, E = NumElts; I != E; I++) {
9069  if (Mask[I] != I + SplitLHS * Offset)
9070  return false;
9071  }
9072 
9073  return true;
9074 }
9075 
9077  SDLoc DL(Op);
9078  EVT VT = Op.getValueType();
9079  SDValue V0 = Op.getOperand(0);
9080  SDValue V1 = Op.getOperand(1);
9081  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
9082 
9085  return SDValue();
9086 
9087  bool SplitV0 = V0.getValueSizeInBits() == 128;
9088 
9089  if (!isConcatMask(Mask, VT, SplitV0))
9090  return SDValue();
9091 
9092  EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
9093  if (SplitV0) {
9094  V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
9095  DAG.getConstant(0, DL, MVT::i64));
9096  }
9097  if (V1.getValueSizeInBits() == 128) {
9098  V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
9099  DAG.getConstant(0, DL, MVT::i64));
9100  }
9101  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
9102 }
9103 
9104 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9105 /// the specified operations to build the shuffle.
9106 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9107  SDValue RHS, SelectionDAG &DAG,
9108  const SDLoc &dl) {
9109  unsigned OpNum = (PFEntry >> 26) & 0x0F;
9110  unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
9111  unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
9112 
9113  enum {
9114  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9115  OP_VREV,
9116  OP_VDUP0,
9117  OP_VDUP1,
9118  OP_VDUP2,
9119  OP_VDUP3,
9120  OP_VEXT1,
9121  OP_VEXT2,
9122  OP_VEXT3,
9123  OP_VUZPL, // VUZP, left result
9124  OP_VUZPR, // VUZP, right result
9125  OP_VZIPL, // VZIP, left result
9126  OP_VZIPR, // VZIP, right result
9127  OP_VTRNL, // VTRN, left result
9128  OP_VTRNR // VTRN, right result
9129  };
9130 
9131  if (OpNum == OP_COPY) {
9132  if (LHSID == (1 * 9 + 2) * 9 + 3)
9133  return LHS;
9134  assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
9135  return RHS;
9136  }
9137 
9138  SDValue OpLHS, OpRHS;
9139  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9140  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9141  EVT VT = OpLHS.getValueType();
9142 
9143  switch (OpNum) {
9144  default:
9145  llvm_unreachable("Unknown shuffle opcode!");
9146  case OP_VREV:
9147  // VREV divides the vector in half and swaps within the half.
9148  if (VT.getVectorElementType() == MVT::i32 ||
9150  return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
9151  // vrev <4 x i16> -> REV32
9152  if (VT.getVectorElementType() == MVT::i16 ||
9153  VT.getVectorElementType() == MVT::f16 ||
9155  return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
9156  // vrev <4 x i8> -> REV16
9158  return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
9159  case OP_VDUP0:
9160  case OP_VDUP1:
9161  case OP_VDUP2:
9162  case OP_VDUP3: {
9163  EVT EltTy = VT.getVectorElementType();
9164  unsigned Opcode;
9165  if (EltTy == MVT::i8)
9166  Opcode = AArch64ISD::DUPLANE8;
9167  else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
9168  Opcode = AArch64ISD::DUPLANE16;
9169  else if (EltTy == MVT::i32 || EltTy == MVT::f32)
9170  Opcode = AArch64ISD::DUPLANE32;
9171  else if (EltTy == MVT::i64 || EltTy == MVT::f64)
9172  Opcode = AArch64ISD::DUPLANE64;
9173  else
9174  llvm_unreachable("Invalid vector element type?");
9175 
9176  if (VT.getSizeInBits() == 64)
9177  OpLHS = WidenVector(OpLHS, DAG);
9178  SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
9179  return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
9180  }
9181  case OP_VEXT1:
9182  case OP_VEXT2:
9183  case OP_VEXT3: {
9184  unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
9185  return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
9186  DAG.getConstant(Imm, dl, MVT::i32));
9187  }
9188  case OP_VUZPL:
9189  return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
9190  OpRHS);
9191  case OP_VUZPR:
9192  return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
9193  OpRHS);
9194  case OP_VZIPL:
9195  return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
9196  OpRHS);
9197  case OP_VZIPR:
9198  return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
9199  OpRHS);
9200  case OP_VTRNL:
9201  return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
9202  OpRHS);
9203  case OP_VTRNR:
9204  return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
9205  OpRHS);
9206  }
9207 }
9208 
9210  SelectionDAG &DAG) {
9211  // Check to see if we can use the TBL instruction.
9212  SDValue V1 = Op.getOperand(0);
9213  SDValue V2 = Op.getOperand(1);
9214  SDLoc DL(Op);
9215 
9216  EVT EltVT = Op.getValueType().getVectorElementType();
9217  unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
9218 
9219  SmallVector<SDValue, 8> TBLMask;
9220  for (int Val : ShuffleMask) {
9221  for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
9222  unsigned Offset = Byte + Val * BytesPerElt;
9223  TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
9224  }
9225  }
9226 
9227  MVT IndexVT = MVT::v8i8;
9228  unsigned IndexLen = 8;
9229  if (Op.getValueSizeInBits() == 128) {
9230  IndexVT = MVT::v16i8;
9231  IndexLen = 16;
9232  }
9233 
9234  SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
9235  SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
9236 
9237  SDValue Shuffle;
9238  if (V2.getNode()->isUndef()) {
9239  if (IndexLen == 8)
9240  V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
9241  Shuffle = DAG.getNode(
9242  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
9243  DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
9244  DAG.getBuildVector(IndexVT, DL,
9245  makeArrayRef(TBLMask.data(), IndexLen)));
9246  } else {
9247  if (IndexLen == 8) {
9248  V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
9249  Shuffle = DAG.getNode(
9250  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
9251  DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
9252  DAG.getBuildVector(IndexVT, DL,
9253  makeArrayRef(TBLMask.data(), IndexLen)));
9254  } else {
9255  // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
9256  // cannot currently represent the register constraints on the input
9257  // table registers.
9258  // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
9259  // DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
9260  // IndexLen));
9261  Shuffle = DAG.getNode(
9262  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
9263  DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
9264  V2Cst, DAG.getBuildVector(IndexVT, DL,
9265  makeArrayRef(TBLMask.data(), IndexLen)));
9266  }
9267  }
9268  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
9269 }
9270 
9271 static unsigned getDUPLANEOp(EVT EltType) {
9272  if (EltType == MVT::i8)
9273  return AArch64ISD::DUPLANE8;
9274  if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
9275  return AArch64ISD::DUPLANE16;
9276  if (EltType == MVT::i32 || EltType == MVT::f32)
9277  return AArch64ISD::DUPLANE32;
9278  if (EltType == MVT::i64 || EltType == MVT::f64)
9279  return AArch64ISD::DUPLANE64;
9280 
9281  llvm_unreachable("Invalid vector element type?");
9282 }
9283 
9284 static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
9285  unsigned Opcode, SelectionDAG &DAG) {
9286  // Try to eliminate a bitcasted extract subvector before a DUPLANE.
9287  auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
9288  // Match: dup (bitcast (extract_subv X, C)), LaneC
9289  if (BitCast.getOpcode() != ISD::BITCAST ||
9291  return false;
9292 
9293  // The extract index must align in the destination type. That may not
9294  // happen if the bitcast is from narrow to wide type.
9295  SDValue Extract = BitCast.getOperand(0);
9296  unsigned ExtIdx = Extract.getConstantOperandVal(1);
9297  unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
9298  unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
9299  unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
9300  if (ExtIdxInBits % CastedEltBitWidth != 0)
9301  return false;
9302 
9303  // Update the lane value by offsetting with the scaled extract index.
9304  LaneC += ExtIdxInBits / CastedEltBitWidth;
9305 
9306  // Determine the casted vector type of the wide vector input.
9307  // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
9308  // Examples:
9309  // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
9310  // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
9311  unsigned SrcVecNumElts =
9312  Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
9313  CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
9314  SrcVecNumElts);
9315  return true;
9316  };
9317  MVT CastVT;
9318  if (getScaledOffsetDup(V, Lane, CastVT)) {
9319  V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
9320  } else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9321  // The lane is incremented by the index of the extract.
9322  // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
9323  Lane += V.getConstantOperandVal(1);
9324  V = V.getOperand(0);
9325  } else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
9326  // The lane is decremented if we are splatting from the 2nd operand.
9327  // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
9328  unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
9329  Lane -= Idx * VT.getVectorNumElements() / 2;
9330  V = WidenVector(V.getOperand(Idx), DAG);
9331  } else if (VT.getSizeInBits() == 64) {
9332  // Widen the operand to 128-bit register with undef.
9333  V = WidenVector(V, DAG);
9334  }
9335  return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
9336 }
9337 
9338 SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9339  SelectionDAG &DAG) const {
9340  SDLoc dl(Op);
9341  EVT VT = Op.getValueType();
9342 
9343  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
9344 
9345  if (useSVEForFixedLengthVectorVT(VT))
9346  return LowerFixedLengthVECTOR_SHUFFLEToSVE(Op, DAG);
9347 
9348  // Convert shuffles that are directly supported on NEON to target-specific
9349  // DAG nodes, instead of keeping them as shuffles and matching them again
9350  // during code selection. This is more efficient and avoids the possibility
9351  // of inconsistencies between legalization and selection.
9352  ArrayRef<int> ShuffleMask = SVN->getMask();
9353 
9354  SDValue V1 = Op.getOperand(0);
9355  SDValue V2 = Op.getOperand(1);
9356 
9357  assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!");
9358  assert(ShuffleMask.size() == VT.getVectorNumElements() &&
9359  "Unexpected VECTOR_SHUFFLE mask size!");
9360 
9361  if (SVN->isSplat()) {
9362  int Lane = SVN->getSplatIndex();
9363  // If this is undef splat, generate it via "just" vdup, if possible.
9364  if (Lane == -1)
9365  Lane = 0;
9366 
9367  if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
9368  return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
9369  V1.getOperand(0));
9370  // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
9371  // constant. If so, we can just reference the lane's definition directly.
9372  if (V1.getOpcode() == ISD::BUILD_VECTOR &&
9373  !isa<ConstantSDNode>(V1.getOperand(Lane)))
9374  return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
9375 
9376  // Otherwise, duplicate from the lane of the input vector.
9377  unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
9378  return constructDup(V1, Lane, dl, VT, Opcode, DAG);
9379  }
9380 
9381  // Check if the mask matches a DUP for a wider element
9382  for (unsigned LaneSize : {64U, 32U, 16U}) {
9383  unsigned Lane = 0;
9384  if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
9385  unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
9386  : LaneSize == 32 ? AArch64ISD::DUPLANE32
9388  // Cast V1 to an integer vector with required lane size
9389  MVT NewEltTy = MVT::getIntegerVT(LaneSize);
9390  unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
9391  MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
9392  V1 = DAG.getBitcast(NewVecTy, V1);
9393  // Constuct the DUP instruction
9394  V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
9395  // Cast back to the original type
9396  return DAG.getBitcast(VT, V1);
9397  }
9398  }
9399 
9400  if (isREVMask(ShuffleMask, VT, 64))
9401  return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
9402  if (isREVMask(ShuffleMask, VT, 32))
9403  return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
9404  if (isREVMask(ShuffleMask, VT, 16))
9405  return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
9406 
9407  if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) ||
9408  (VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) &&
9409  ShuffleVectorInst::isReverseMask(ShuffleMask)) {
9410  SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1);
9411  return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev,
9412  DAG.getConstant(8, dl, MVT::i32));
9413  }
9414 
9415  bool ReverseEXT = false;
9416  unsigned Imm;
9417  if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
9418  if (ReverseEXT)
9419  std::swap(V1, V2);
9420  Imm *= getExtFactor(V1);
9421  return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
9422  DAG.getConstant(Imm, dl, MVT::i32));
9423  } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
9424  Imm *= getExtFactor(V1);
9425  return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
9426  DAG.getConstant(Imm, dl, MVT::i32));
9427  }
9428 
9429  unsigned WhichResult;
9430  if (isZIPMask(ShuffleMask, VT, WhichResult)) {
9431  unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
9432  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
9433  }
9434  if (isUZPMask(ShuffleMask, VT, WhichResult)) {
9435  unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
9436  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
9437  }
9438  if (isTRNMask(ShuffleMask, VT, WhichResult)) {
9439  unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
9440  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
9441  }
9442 
9443  if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
9444  unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
9445  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
9446  }
9447  if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
9448  unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
9449  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
9450  }
9451  if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
9452  unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
9453  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
9454  }
9455 
9457  return Concat;
9458 
9459  bool DstIsLeft;
9460  int Anomaly;
9461  int NumInputElements = V1.getValueType().getVectorNumElements();
9462  if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
9463  SDValue DstVec = DstIsLeft ? V1 : V2;
9464  SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
9465 
9466  SDValue SrcVec = V1;
9467  int SrcLane = ShuffleMask[Anomaly];
9468  if (SrcLane >= NumInputElements) {
9469  SrcVec = V2;
9470  SrcLane -= VT.getVectorNumElements();
9471  }
9472  SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
9473 
9474  EVT ScalarVT = VT.getVectorElementType();
9475 
9476  if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
9477  ScalarVT = MVT::i32;
9478 
9479  return DAG.getNode(
9480  ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
9481  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
9482  DstLaneV);
9483  }
9484 
9485  // If the shuffle is not directly supported and it has 4 elements, use
9486  // the PerfectShuffle-generated table to synthesize it from other shuffles.
9487  unsigned NumElts = VT.getVectorNumElements();
9488  if (NumElts == 4) {
9489  unsigned PFIndexes[4];
9490  for (unsigned i = 0; i != 4; ++i) {
9491  if (ShuffleMask[i] < 0)
9492  PFIndexes[i] = 8;
9493  else
9494  PFIndexes[i] = ShuffleMask[i];
9495  }
9496 
9497  // Compute the index in the perfect shuffle table.
9498  unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
9499  PFIndexes[2] * 9 + PFIndexes[3];
9500  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9501  unsigned Cost = (PFEntry >> 30);
9502 
9503  if (Cost <= 4)
9504  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9505  }
9506 
9507  return GenerateTBL(Op, ShuffleMask, DAG);
9508 }
9509 
9510 SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
9511  SelectionDAG &DAG) const {
9512  SDLoc dl(Op);
9513  EVT VT = Op.getValueType();
9514  EVT ElemVT = VT.getScalarType();
9515  SDValue SplatVal = Op.getOperand(0);
9516 
9517  if (useSVEForFixedLengthVectorVT(VT))
9518  return LowerToScalableOp(Op, DAG);
9519 
9520  // Extend input splat value where needed to fit into a GPR (32b or 64b only)
9521  // FPRs don't have this restriction.
9522  switch (ElemVT.getSimpleVT().SimpleTy) {
9523  case MVT::i1: {
9524  // The only legal i1 vectors are SVE vectors, so we can use SVE-specific
9525  // lowering code.
9526  if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
9527  if (ConstVal->isOne())
9528  return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
9529  // TODO: Add special case for constant false
9530  }
9531  // The general case of i1. There isn't any natural way to do this,
9532  // so we use some trickery with whilelo.
9533  SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
9534  SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal,
9535  DAG.getValueType(MVT::i1));
9536  SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl,
9537  MVT::i64);
9538  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID,
9539  DAG.getConstant(0, dl, MVT::i64), SplatVal);
9540  }
9541  case MVT::i8:
9542  case MVT::i16:
9543  case MVT::i32:
9544  SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32);
9545  break;
9546  case MVT::i64:
9547  SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
9548  break;
9549  case MVT::f16:
9550  case MVT::bf16:
9551  case MVT::f32:
9552  case MVT::f64:
9553  // Fine as is
9554  break;
9555  default:
9556  report_fatal_error("Unsupported SPLAT_VECTOR input operand type");
9557  }
9558 
9559  return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
9560 }
9561 
9562 SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
9563  SelectionDAG &DAG) const {
9564  SDLoc DL(Op);
9565 
9566  EVT VT = Op.getValueType();
9567  if (!isTypeLegal(VT) || !VT.isScalableVector())
9568  return SDValue();
9569 
9570  // Current lowering only supports the SVE-ACLE types.
9572  return SDValue();
9573 
9574  // The DUPQ operation is indepedent of element type so normalise to i64s.
9575  SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1));
9576  SDValue Idx128 = Op.getOperand(2);
9577 
9578  // DUPQ can be used when idx is in range.
9579  auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
9580  if (CIdx && (CIdx->getZExtValue() <= 3)) {
9581  SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
9582  SDNode *DUPQ =
9583  DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI);
9584  return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0));
9585  }
9586 
9587  // The ACLE says this must produce the same result as:
9588  // svtbl(data, svadd_x(svptrue_b64(),
9589  // svand_x(svptrue_b64(), svindex_u64(0, 1), 1),
9590  // index * 2))
9591  SDValue One = DAG.getConstant(1, DL, MVT::i64);
9592  SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One);
9593 
9594  // create the vector 0,1,0,1,...
9595  SDValue SV = DAG.getStepVector(DL, MVT::nxv2i64);
9596  SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne);
9597 
9598  // create the vector idx64,idx64+1,idx64,idx64+1,...
9599  SDValue Idx64 = DAG.getNode(ISD::ADD, DL, MVT::i64, Idx128, Idx128);
9600  SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64);
9601  SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64);
9602 
9603  // create the vector Val[idx64],Val[idx64+1],Val[idx64],Val[idx64+1],...
9604  SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask);
9605  return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
9606 }
9607 
9608 
9609 static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
9610  APInt &UndefBits) {
9611  EVT VT = BVN->getValueType(0);
9612  APInt SplatBits, SplatUndef;
9613  unsigned SplatBitSize;
9614  bool HasAnyUndefs;
9615  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
9616  unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
9617 
9618  for (unsigned i = 0; i < NumSplats; ++i) {
9619  CnstBits <<= SplatBitSize;
9620  UndefBits <<= SplatBitSize;
9621  CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
9622  UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
9623  }
9624 
9625  return true;
9626  }
9627 
9628  return false;
9629 }
9630 
9631 // Try 64-bit splatted SIMD immediate.
9632 static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9633  const APInt &Bits) {
9634  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9635  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9636  EVT VT = Op.getValueType();
9637  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;
9638 
9641 
9642  SDLoc dl(Op);
9643  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9644  DAG.getConstant(Value, dl, MVT::i32));
9645  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9646  }
9647  }
9648 
9649  return SDValue();
9650 }
9651 
9652 // Try 32-bit splatted SIMD immediate.
9653 static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9654  const APInt &Bits,
9655  const SDValue *LHS = nullptr) {
9656  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9657  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9658  EVT VT = Op.getValueType();
9659  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
9660  bool isAdvSIMDModImm = false;
9661  uint64_t Shift;
9662 
9663  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
9665  Shift = 0;
9666  }
9667  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
9669  Shift = 8;
9670  }
9671  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
9673  Shift = 16;
9674  }
9675  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
9677  Shift = 24;
9678  }
9679 
9680  if (isAdvSIMDModImm) {
9681  SDLoc dl(Op);
9682  SDValue Mov;
9683 
9684  if (LHS)
9685  Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
9686  DAG.getConstant(Value, dl, MVT::i32),
9687  DAG.getConstant(Shift, dl, MVT::i32));
9688  else
9689  Mov = DAG.getNode(NewOp, dl, MovTy,
9690  DAG.getConstant(Value, dl, MVT::i32),
9691  DAG.getConstant(Shift, dl, MVT::i32));
9692 
9693  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9694  }
9695  }
9696 
9697  return SDValue();
9698 }
9699 
9700 // Try 16-bit splatted SIMD immediate.
9701 static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9702  const APInt &Bits,
9703  const SDValue *LHS = nullptr) {
9704  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9705  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9706  EVT VT = Op.getValueType();
9707  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
9708  bool isAdvSIMDModImm = false;
9709  uint64_t Shift;
9710 
9711  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
9713  Shift = 0;
9714  }
9715  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
9717  Shift = 8;
9718  }
9719 
9720  if (isAdvSIMDModImm) {
9721  SDLoc dl(Op);
9722  SDValue Mov;
9723 
9724  if (LHS)
9725  Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
9726  DAG.getConstant(Value, dl, MVT::i32),
9727  DAG.getConstant(Shift, dl, MVT::i32));
9728  else
9729  Mov = DAG.getNode(NewOp, dl, MovTy,
9730  DAG.getConstant(Value, dl, MVT::i32),
9731  DAG.getConstant(Shift, dl, MVT::i32));
9732 
9733  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9734  }
9735  }
9736 
9737  return SDValue();
9738 }
9739 
9740 // Try 32-bit splatted SIMD immediate with shifted ones.
9741 static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
9742  SelectionDAG &DAG, const APInt &Bits) {
9743  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9744  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9745  EVT VT = Op.getValueType();
9746  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
9747  bool isAdvSIMDModImm = false;
9748  uint64_t Shift;
9749 
9750  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
9752  Shift = 264;
9753  }
9754  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
9756  Shift = 272;
9757  }
9758 
9759  if (isAdvSIMDModImm) {
9760  SDLoc dl(Op);
9761  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9762  DAG.getConstant(Value, dl, MVT::i32),
9763  DAG.getConstant(Shift, dl, MVT::i32));
9764  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9765  }
9766  }
9767 
9768  return SDValue();
9769 }
9770 
9771 // Try 8-bit splatted SIMD immediate.
9772 static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9773  const APInt &Bits) {
9774  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9775  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9776  EVT VT = Op.getValueType();
9777  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
9778 
9781 
9782  SDLoc dl(Op);
9783  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9784  DAG.getConstant(Value, dl, MVT::i32));
9785  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9786  }
9787  }
9788 
9789  return SDValue();
9790 }
9791 
9792 // Try FP splatted SIMD immediate.
9793 static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9794  const APInt &Bits) {
9795  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9796  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9797  EVT VT = Op.getValueType();
9798  bool isWide = (VT.getSizeInBits() == 128);
9799  MVT MovTy;
9800  bool isAdvSIMDModImm = false;
9801 
9802  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
9804  MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
9805  }
9806  else if (isWide &&
9807  (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
9809  MovTy = MVT::v2f64;
9810  }
9811 
9812  if (isAdvSIMDModImm) {
9813  SDLoc dl(Op);
9814  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9815  DAG.getConstant(Value, dl, MVT::i32));
9816  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9817  }
9818  }
9819 
9820  return SDValue();
9821 }
9822 
9823 // Specialized code to quickly find if PotentialBVec is a BuildVector that
9824 // consists of only the same constant int value, returned in reference arg
9825 // ConstVal
9826 static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
9827  uint64_t &ConstVal) {
9828  BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
9829  if (!Bvec)
9830  return false;
9831  ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
9832  if (!FirstElt)
9833  return false;
9834  EVT VT = Bvec->getValueType(0);
9835  unsigned NumElts = VT.getVectorNumElements();
9836  for (unsigned i = 1; i < NumElts; ++i)
9837  if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
9838  return false;
9839  ConstVal = FirstElt->getZExtValue();
9840  return true;
9841 }
9842 
9843 static unsigned getIntrinsicID(const SDNode *N) {
9844  unsigned Opcode = N->getOpcode();
9845  switch (Opcode) {
9846  default:
9847  return Intrinsic::not_intrinsic;
9848  case ISD::INTRINSIC_WO_CHAIN: {
9849  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
9850  if (IID < Intrinsic::num_intrinsics)
9851  return IID;
9852  return Intrinsic::not_intrinsic;
9853  }
9854  }
9855 }
9856 
9857 // Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
9858 // to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
9859 // BUILD_VECTORs with constant element C1, C2 is a constant, and:
9860 // - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
9861 // - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
9862 // The (or (lsl Y, C2), (and X, BvecC1)) case is also handled.
9864  EVT VT = N->getValueType(0);
9865 
9866  if (!VT.isVector())
9867  return SDValue();
9868 
9869  SDLoc DL(N);
9870 
9871  SDValue And;
9872  SDValue Shift;
9873 
9874  SDValue FirstOp = N->getOperand(0);
9875  unsigned FirstOpc = FirstOp.getOpcode();
9876  SDValue SecondOp = N->getOperand(1);
9877  unsigned SecondOpc = SecondOp.getOpcode();
9878 
9879  // Is one of the operands an AND or a BICi? The AND may have been optimised to
9880  // a BICi in order to use an immediate instead of a register.
9881  // Is the other operand an shl or lshr? This will have been turned into:
9882  // AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
9883  if ((FirstOpc == ISD::AND || FirstOpc == AArch64ISD::BICi) &&
9884  (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR)) {
9885  And = FirstOp;
9886  Shift = SecondOp;
9887 
9888  } else if ((SecondOpc == ISD::AND || SecondOpc == AArch64ISD::BICi) &&
9889  (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR)) {
9890  And = SecondOp;
9891  Shift = FirstOp;
9892  } else
9893  return SDValue();
9894 
9895  bool IsAnd = And.getOpcode() == ISD::AND;
9896  bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR;
9897 
9898  // Is the shift amount constant?
9899  ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
9900  if (!C2node)
9901  return SDValue();
9902 
9903  uint64_t C1;
9904  if (IsAnd) {
9905  // Is the and mask vector all constant?
9906  if (!isAllConstantBuildVector(And.getOperand(1), C1))
9907  return SDValue();
9908  } else {
9909  // Reconstruct the corresponding AND immediate from the two BICi immediates.
9910  ConstantSDNode *C1nodeImm = dyn_cast<ConstantSDNode>(And.getOperand(1));
9911  ConstantSDNode *C1nodeShift = dyn_cast<ConstantSDNode>(And.getOperand(2));
9912  assert(C1nodeImm && C1nodeShift);
9913  C1 = ~(C1nodeImm->getZExtValue() << C1nodeShift->getZExtValue());
9914  }
9915 
9916  // Is C1 == ~(Ones(ElemSizeInBits) << C2) or
9917  // C1 == ~(Ones(ElemSizeInBits) >> C2), taking into account
9918  // how much one can shift elements of a particular size?
9919  uint64_t C2 = C2node->getZExtValue();
9920  unsigned ElemSizeInBits = VT.getScalarSizeInBits();
9921  if (C2 > ElemSizeInBits)
9922  return SDValue();
9923 
9924  APInt C1AsAPInt(ElemSizeInBits, C1);
9925  APInt RequiredC1 = IsShiftRight ? APInt::getHighBitsSet(ElemSizeInBits, C2)
9926  : APInt::getLowBitsSet(ElemSizeInBits, C2);
9927  if (C1AsAPInt != RequiredC1)
9928  return SDValue();
9929 
9930  SDValue X = And.getOperand(0);
9931  SDValue Y = Shift.getOperand(0);
9932 
9933  unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
9934  SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
9935 
9936  LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
9937  LLVM_DEBUG(N->dump(&DAG));
9938  LLVM_DEBUG(dbgs() << "into: \n");
9939  LLVM_DEBUG(ResultSLI->dump(&DAG));
9940 
9941  ++NumShiftInserts;
9942  return ResultSLI;
9943 }
9944 
9945 SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
9946  SelectionDAG &DAG) const {
9947  if (useSVEForFixedLengthVectorVT(Op.getValueType()))
9948  return LowerToScalableOp(Op, DAG);
9949 
9950  // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
9951  if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
9952  return Res;
9953 
9954  EVT VT = Op.getValueType();
9955 
9956  SDValue LHS = Op.getOperand(0);
9957  BuildVectorSDNode *BVN =
9958  dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
9959  if (!BVN) {
9960  // OR commutes, so try swapping the operands.
9961  LHS = Op.getOperand(1);
9962  BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
9963  }
9964  if (!BVN)
9965  return Op;
9966 
9967  APInt DefBits(VT.getSizeInBits(), 0);
9968  APInt UndefBits(VT.getSizeInBits(), 0);
9969  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
9970  SDValue NewOp;
9971 
9972  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
9973  DefBits, &LHS)) ||
9974  (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
9975  DefBits, &LHS)))
9976  return NewOp;
9977 
9978  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
9979  UndefBits, &LHS)) ||
9980  (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
9981  UndefBits, &LHS)))
9982  return NewOp;
9983  }
9984 
9985  // We can always fall back to a non-immediate OR.
9986  return Op;
9987 }
9988 
9989 // Normalize the operands of BUILD_VECTOR. The value of constant operands will
9990 // be truncated to fit element width.
9992  SelectionDAG &DAG) {
9993  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
9994  SDLoc dl(Op);
9995  EVT VT = Op.getValueType();
9996  EVT EltTy= VT.getVectorElementType();
9997 
9998  if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
9999  return Op;
10000 
10002  for (SDValue Lane : Op->ops()) {
10003  // For integer vectors, type legalization would have promoted the
10004  // operands already. Otherwise, if Op is a floating-point splat
10005  // (with operands cast to integers), then the only possibilities
10006  // are constants and UNDEFs.
10007  if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
10008  APInt LowBits(EltTy.getSizeInBits(),
10009  CstLane->getZExtValue());
10010  Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
10011  } else if (Lane.getNode()->isUndef()) {
10012  Lane = DAG.getUNDEF(MVT::i32);
10013  } else {
10014  assert(Lane.getValueType() == MVT::i32 &&
10015  "Unexpected BUILD_VECTOR operand type");
10016  }
10017  Ops.push_back(Lane);
10018  }
10019  return DAG.getBuildVector(VT, dl, Ops);
10020 }
10021 
10023  EVT VT = Op.getValueType();
10024 
10025  APInt DefBits(VT.getSizeInBits(), 0);
10026  APInt UndefBits(VT.getSizeInBits(), 0);
10027  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
10028  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
10029  SDValue NewOp;
10030  if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
10031  (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10032  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
10033  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10034  (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
10035  (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
10036  return NewOp;
10037 
10038  DefBits = ~DefBits;
10039  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
10040  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
10041  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
10042  return NewOp;
10043 
10044  DefBits = UndefBits;
10045  if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
10046  (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10047  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
10048  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10049  (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
10050  (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
10051  return NewOp;
10052 
10053  DefBits = ~UndefBits;
10054  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
10055  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
10056  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
10057  return NewOp;
10058  }
10059 
10060  return SDValue();
10061 }
10062 
10063 SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
10064  SelectionDAG &DAG) const {
10065  EVT VT = Op.getValueType();
10066 
10067  // Try to build a simple constant vector.
10068  Op = NormalizeBuildVector(Op, DAG);
10069  if (VT.isInteger()) {
10070  // Certain vector constants, used to express things like logical NOT and
10071  // arithmetic NEG, are passed through unmodified. This allows special
10072  // patterns for these operations to match, which will lower these constants
10073  // to whatever is proven necessary.
10074  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
10075  if (BVN->isConstant())
10076  if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
10077  unsigned BitSize = VT.getVectorElementType().getSizeInBits();
10078  APInt Val(BitSize,
10079  Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
10080  if (Val.isNullValue() || Val.isAllOnesValue())
10081  return Op;
10082  }
10083  }
10084 
10085  if (SDValue V = ConstantBuildVector(Op, DAG))
10086  return V;
10087 
10088  // Scan through the operands to find some interesting properties we can
10089  // exploit:
10090  // 1) If only one value is used, we can use a DUP, or
10091  // 2) if only the low element is not undef, we can just insert that, or
10092  // 3) if only one constant value is used (w/ some non-constant lanes),
10093  // we can splat the constant value into the whole vector then fill
10094  // in the non-constant lanes.
10095  // 4) FIXME: If different constant values are used, but we can intelligently
10096  // select the values we'll be overwriting for the non-constant
10097  // lanes such that we can directly materialize the vector
10098  // some other way (MOVI, e.g.), we can be sneaky.
10099  // 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
10100  SDLoc dl(Op);
10101  unsigned NumElts = VT.getVectorNumElements();
10102  bool isOnlyLowElement = true;
10103  bool usesOnlyOneValue = true;
10104  bool usesOnlyOneConstantValue = true;
10105  bool isConstant = true;
10106  bool AllLanesExtractElt = true;
10107  unsigned NumConstantLanes = 0;
10108  unsigned NumDifferentLanes = 0;
10109  unsigned NumUndefLanes = 0;
10110  SDValue Value;
10111  SDValue ConstantValue;
10112  for (unsigned i = 0; i < NumElts; ++i) {
10113  SDValue V = Op.getOperand(i);
10115  AllLanesExtractElt = false;
10116  if (V.isUndef()) {
10117  ++NumUndefLanes;
10118  continue;
10119  }
10120  if (i > 0)
10121  isOnlyLowElement = false;
10122  if (!isIntOrFPConstant(V))
10123  isConstant = false;
10124 
10125  if (isIntOrFPConstant(V)) {
10126  ++NumConstantLanes;
10127  if (!ConstantValue.getNode())
10128  ConstantValue = V;
10129  else if (ConstantValue != V)
10130  usesOnlyOneConstantValue = false;
10131  }
10132 
10133  if (!Value.getNode())
10134  Value = V;
10135  else if (V != Value) {
10136  usesOnlyOneValue = false;
10137  ++NumDifferentLanes;
10138  }
10139  }
10140 
10141  if (!Value.getNode()) {
10142  LLVM_DEBUG(
10143  dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n");
10144  return DAG.getUNDEF(VT);
10145  }
10146 
10147  // Convert BUILD_VECTOR where all elements but the lowest are undef into
10148  // SCALAR_TO_VECTOR, except for when we have a single-element constant vector
10149  // as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
10150  if (isOnlyLowElement && !(NumElts == 1 && isIntOrFPConstant(Value))) {
10151  LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
10152  "SCALAR_TO_VECTOR node\n");
10153  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
10154  }
10155 
10156  if (AllLanesExtractElt) {
10157  SDNode *Vector = nullptr;
10158  bool Even = false;
10159  bool Odd = false;
10160  // Check whether the extract elements match the Even pattern <0,2,4,...> or
10161  // the Odd pattern <1,3,5,...>.
10162  for (unsigned i = 0; i < NumElts; ++i) {
10163  SDValue V = Op.getOperand(i);
10164  const SDNode *N = V.getNode();
10165  if (!isa<ConstantSDNode>(N->getOperand(1)))
10166  break;
10167  SDValue N0 = N->getOperand(0);
10168 
10169  // All elements are extracted from the same vector.
10170  if (!Vector) {
10171  Vector = N0.getNode();
10172  // Check that the type of EXTRACT_VECTOR_ELT matches the type of
10173  // BUILD_VECTOR.
10174  if (VT.getVectorElementType() !=
10176  break;
10177  } else if (Vector != N0.getNode()) {
10178  Odd = false;
10179  Even = false;
10180  break;
10181  }
10182 
10183  // Extracted values are either at Even indices <0,2,4,...> or at Odd
10184  // indices <1,3,5,...>.
10185  uint64_t Val = N->getConstantOperandVal(1);
10186  if (Val == 2 * i) {
10187  Even = true;
10188  continue;
10189  }
10190  if (Val - 1 == 2 * i) {
10191  Odd = true;
10192  continue;
10193  }
10194 
10195  // Something does not match: abort.
10196  Odd = false;
10197  Even = false;
10198  break;
10199  }
10200  if (Even || Odd) {
10201  SDValue LHS =
10202  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
10203  DAG.getConstant(0, dl, MVT::i64));
10204  SDValue RHS =
10205  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
10206  DAG.getConstant(NumElts, dl, MVT::i64));
10207 
10208  if (Even && !Odd)
10209  return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS,
10210  RHS);
10211  if (Odd && !Even)
10212  return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS,
10213  RHS);
10214  }
10215  }
10216 
10217  // Use DUP for non-constant splats. For f32 constant splats, reduce to
10218  // i32 and try again.
10219  if (usesOnlyOneValue) {
10220  if (!isConstant) {
10221  if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
10222  Value.getValueType() != VT) {
10223  LLVM_DEBUG(
10224  dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
10225  return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
10226  }
10227 
10228  // This is actually a DUPLANExx operation, which keeps everything vectory.
10229 
10230  SDValue Lane = Value.getOperand(1);
10231  Value = Value.getOperand(0);
10232  if (Value.getValueSizeInBits() == 64) {
10233  LLVM_DEBUG(
10234  dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
10235  "widening it\n");
10236  Value = WidenVector(Value, DAG);
10237  }
10238 
10239  unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
10240  return DAG.getNode(Opcode, dl, VT, Value, Lane);
10241  }
10242 
10245  EVT EltTy = VT.getVectorElementType();
10246  assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
10247  EltTy == MVT::f64) && "Unsupported floating-point vector type");
10248  LLVM_DEBUG(
10249  dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
10250  "BITCASTS, and try again\n");
10251  MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
10252  for (unsigned i = 0; i < NumElts; ++i)
10253  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
10254  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
10255  SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
10256  LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";
10257  Val.dump(););
10258  Val = LowerBUILD_VECTOR(Val, DAG);
10259  if (Val.getNode())
10260  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
10261  }
10262  }
10263 
10264  // If we need to insert a small number of different non-constant elements and
10265  // the vector width is sufficiently large, prefer using DUP with the common
10266  // value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
10267  // skip the constant lane handling below.
10268  bool PreferDUPAndInsert =
10269  !isConstant && NumDifferentLanes >= 1 &&
10270  NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
10271  NumDifferentLanes >= NumConstantLanes;
10272 
10273  // If there was only one constant value used and for more than one lane,
10274  // start by splatting that value, then replace the non-constant lanes. This
10275  // is better than the default, which will perform a separate initialization
10276  // for each lane.
10277  if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
10278  // Firstly, try to materialize the splat constant.
10279  SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
10280  Val = ConstantBuildVector(Vec, DAG);
10281  if (!Val) {
10282  // Otherwise, materialize the constant and splat it.
10283  Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
10284  DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
10285  }
10286 
10287  // Now insert the non-constant lanes.
10288  for (unsigned i = 0; i < NumElts; ++i) {
10289  SDValue V = Op.getOperand(i);
10290  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
10291  if (!isIntOrFPConstant(V))
10292  // Note that type legalization likely mucked about with the VT of the
10293  // source operand, so we may have to convert it here before inserting.
10294  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
10295  }
10296  return Val;
10297  }
10298 
10299  // This will generate a load from the constant pool.
10300  if (isConstant) {
10301  LLVM_DEBUG(
10302  dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
10303  "expansion\n");
10304  return SDValue();
10305  }
10306 
10307  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
10308  if (NumElts >= 4) {
10309  if (SDValue shuffle = ReconstructShuffle(Op, DAG))
10310  return shuffle;
10311  }
10312 
10313  if (PreferDUPAndInsert) {
10314  // First, build a constant vector with the common element.
10315  SmallVector<SDValue, 8> Ops(NumElts, Value);
10316  SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
10317  // Next, insert the elements that do not match the common value.
10318  for (unsigned I = 0; I < NumElts; ++I)
10319  if (Op.getOperand(I) != Value)
10320  NewVector =
10321  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
10322  Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
10323 
10324  return NewVector;
10325  }
10326 
10327  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
10328  // know the default expansion would otherwise fall back on something even
10329  // worse. For a vector with one or two non-undef values, that's
10330  // scalar_to_vector for the elements followed by a shuffle (provided the
10331  // shuffle is valid for the target) and materialization element by element
10332  // on the stack followed by a load for everything else.
10333  if (!isConstant && !usesOnlyOneValue) {
10334  LLVM_DEBUG(
10335  dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
10336  "of INSERT_VECTOR_ELT\n");
10337 
10338  SDValue Vec = DAG.getUNDEF(VT);
10339  SDValue Op0 = Op.getOperand(0);
10340  unsigned i = 0;
10341 
10342  // Use SCALAR_TO_VECTOR for lane zero to
10343  // a) Avoid a RMW dependency on the full vector register, and
10344  // b) Allow the register coalescer to fold away the copy if the
10345  // value is already in an S or D register, and we're forced to emit an
10346  // INSERT_SUBREG that we can't fold anywhere.
10347  //
10348  // We also allow types like i8 and i16 which are illegal scalar but legal
10349  // vector element types. After type-legalization the inserted value is
10350  // extended (i32) and it is safe to cast them to the vector type by ignoring
10351  // the upper bits of the lowest lane (e.g. v8i8, v4i16).
10352  if (!Op0.isUndef()) {
10353  LLVM_DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n");
10354  Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
10355  ++i;
10356  }
10357  LLVM_DEBUG(if (i < NumElts) dbgs()
10358  << "Creating nodes for the other vector elements:\n";);
10359  for (; i < NumElts; ++i) {
10360  SDValue V = Op.getOperand(i);
10361  if (V.isUndef())
10362  continue;
10363  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
10364  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
10365  }
10366  return Vec;
10367  }
10368 
10369  LLVM_DEBUG(
10370  dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
10371  "better alternative\n");
10372  return SDValue();
10373 }
10374 
10375 SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
10376  SelectionDAG &DAG) const {
10377  if (useSVEForFixedLengthVectorVT(Op.getValueType()))
10378  return LowerFixedLengthConcatVectorsToSVE(Op, DAG);
10379 
10380  assert(Op.getValueType().isScalableVector() &&
10381  isTypeLegal(Op.getValueType()) &&
10382  "Expected legal scalable vector type!");
10383 
10384  if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2)
10385  return Op;
10386 
10387  return SDValue();
10388 }
10389 
10390 SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10391  SelectionDAG &DAG) const {
10392  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
10393 
10394  if (useSVEForFixedLengthVectorVT(Op.getValueType()))
10395  return LowerFixedLengthInsertVectorElt(Op, DAG);
10396 
10397  // Check for non-constant or out of range lane.
10398  EVT VT = Op.getOperand(0).getValueType();
10399 
10400  if (VT.getScalarType() == MVT::i1) {
10401  EVT VectorVT = getPromotedVTForPredicate(VT);
10402  SDLoc DL(Op);
10403  SDValue ExtendedVector =
10404  DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VectorVT);
10405  SDValue ExtendedValue =
10406  DAG.getAnyExtOrTrunc(Op.getOperand(1), DL,
10407  VectorVT.getScalarType().getSizeInBits() < 32
10408  ? MVT::i32
10409  : VectorVT.getScalarType());
10410  ExtendedVector =
10411  DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VectorVT, ExtendedVector,
10412  ExtendedValue, Op.getOperand(2));
10413  return DAG.getAnyExtOrTrunc(ExtendedVector, DL, VT);
10414  }
10415 
10416  ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10417  if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
10418  return SDValue();
10419 
10420  // Insertion/extraction are legal for V128 types.
10421  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
10422  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
10423  VT == MVT::v8f16 || VT == MVT::v8bf16)
10424  return Op;
10425 
10426  if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
10427  VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
10428  VT != MVT::v4bf16)
10429  return SDValue();
10430 
10431  // For V64 types, we perform insertion by expanding the value
10432  // to a V128 type and perform the insertion on that.
10433  SDLoc DL(Op);
10434  SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
10435  EVT WideTy = WideVec.getValueType();
10436 
10437  SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
10438  Op.getOperand(1), Op.getOperand(2));
10439  // Re-narrow the resultant vector.
10440  return NarrowVector(Node, DAG);
10441 }
10442 
10443 SDValue
10444 AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
10445  SelectionDAG &DAG) const {
10446  assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
10447  EVT VT = Op.getOperand(0).getValueType();
10448 
10449  if (VT.getScalarType() == MVT::i1) {
10450  // We can't directly extract from an SVE predicate; extend it first.
10451  // (This isn't the only possible lowering, but it's straightforward.)
10452  EVT VectorVT = getPromotedVTForPredicate(VT);
10453  SDLoc DL(Op);
10454  SDValue Extend =
10455  DAG.getNode(ISD::ANY_EXTEND, DL, VectorVT, Op.getOperand(0));
10456  MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
10457  SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractTy,
10458  Extend, Op.getOperand(1));
10459  return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
10460  }
10461 
10462  if (useSVEForFixedLengthVectorVT(VT))
10463  return LowerFixedLengthExtractVectorElt(Op, DAG);
10464 
10465  // Check for non-constant or out of range lane.
10466  ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
10467  if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
10468  return SDValue();
10469 
10470  // Insertion/extraction are legal for V128 types.
10471  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
10472  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
10473  VT == MVT::v8f16 || VT == MVT::v8bf16)
10474  return Op;
10475 
10476  if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
10477  VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
10478  VT != MVT::v4bf16)
10479  return SDValue();
10480 
10481  // For V64 types, we perform extraction by expanding the value
10482  // to a V128 type and perform the extraction on that.
10483  SDLoc DL(Op);
10484  SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
10485  EVT WideTy = WideVec.getValueType();
10486 
10487  EVT ExtrTy = WideTy.getVectorElementType();
10488  if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
10489  ExtrTy = MVT::i32;
10490 
10491  // For extractions, we just return the result directly.
10492  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
10493  Op.getOperand(1));
10494 }
10495 
10496 SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
10497  SelectionDAG &DAG) const {
10498  assert(Op.getValueType().isFixedLengthVector() &&
10499  "Only cases that extract a fixed length vector are supported!");
10500 
10501  EVT InVT = Op.getOperand(0).getValueType();
10502  unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
10503  unsigned Size = Op.getValueSizeInBits();
10504 
10505  if (InVT.isScalableVector()) {
10506  // This will be matched by custom code during ISelDAGToDAG.
10507  if (Idx == 0 && isPackedVectorType(InVT, DAG))
10508  return Op;
10509 
10510  return SDValue();
10511  }
10512 
10513  // This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
10514  if (Idx == 0 && InVT.getSizeInBits() <= 128)
10515  return Op;
10516 
10517  // If this is extracting the upper 64-bits of a 128-bit vector, we match
10518  // that directly.
10519  if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
10520  InVT.getSizeInBits() == 128)
10521  return Op;
10522 
10523  return SDValue();
10524 }
10525 
10526 SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
10527  SelectionDAG &DAG) const {
10528  assert(Op.getValueType().isScalableVector() &&
10529  "Only expect to lower inserts into scalable vectors!");
10530 
10531  EVT InVT = Op.getOperand(1).getValueType();
10532  unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
10533 
10534  if (InVT.isScalableVector()) {
10535  SDLoc DL(Op);
10536  EVT VT = Op.getValueType();
10537 
10538  if (!isTypeLegal(VT) || !VT.isInteger())
10539  return SDValue();
10540 
10541  SDValue Vec0 = Op.getOperand(0);
10542  SDValue Vec1 = Op.getOperand(1);
10543 
10544  // Ensure the subvector is half the size of the main vector.
10545  if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
10546  return SDValue();
10547 
10548  // Extend elements of smaller vector...
10549  EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
10550  SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
10551 
10552  if (Idx == 0) {
10553  SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
10554  return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0);
10555  } else if (Idx == InVT.getVectorMinNumElements()) {
10556  SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
10557  return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec);
10558  }
10559 
10560  return SDValue();
10561  }
10562 
10563  // This will be matched by custom code during ISelDAGToDAG.
10564  if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
10565  return Op;
10566 
10567  return SDValue();
10568 }
10569 
10570 SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
10571  EVT VT = Op.getValueType();
10572 
10573  if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
10574  return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
10575 
10576  assert(VT.isScalableVector() && "Expected a scalable vector.");
10577 
10578  bool Signed = Op.getOpcode() == ISD::SDIV;
10579  unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
10580 
10581  if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
10582  return LowerToPredicatedOp(Op, DAG, PredOpcode);
10583 
10584  // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
10585  // operations, and truncate the result.
10586  EVT WidenedVT;
10587  if (VT == MVT::nxv16i8)
10588  WidenedVT = MVT::nxv8i16;
10589  else if (VT == MVT::nxv8i16)
10590  WidenedVT = MVT::nxv4i32;
10591  else
10592  llvm_unreachable("Unexpected Custom DIV operation");
10593 
10594  SDLoc dl(Op);
10595  unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
10596  unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
10597  SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
10598  SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
10599  SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
10600  SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
10601  SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
10602  SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
10603  return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
10604 }
10605 
10607  // Currently no fixed length shuffles that require SVE are legal.
10608  if (useSVEForFixedLengthVectorVT(VT))
10609  return false;
10610 
10611  if (VT.getVectorNumElements() == 4 &&
10612  (VT.is128BitVector() || VT.is64BitVector())) {
10613  unsigned PFIndexes[4];
10614  for (unsigned i = 0; i != 4; ++i) {
10615  if (M[i] < 0)
10616  PFIndexes[i] = 8;
10617  else
10618  PFIndexes[i] = M[i];
10619  }
10620 
10621  // Compute the index in the perfect shuffle table.
10622  unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10623  PFIndexes[2] * 9 + PFIndexes[3];
10624  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10625  unsigned Cost = (PFEntry >> 30);
10626 
10627  if (Cost <= 4)
10628  return true;
10629  }
10630 
10631  bool DummyBool;
10632  int DummyInt;
10633  unsigned DummyUnsigned;
10634 
10635  return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
10636  isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
10637  isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
10638  // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
10639  isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
10640  isZIPMask(M, VT, DummyUnsigned) ||
10641  isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
10642  isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
10643  isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
10644  isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
10645  isConcatMask(M, VT, VT.getSizeInBits() == 128));
10646 }
10647 
10648 /// getVShiftImm - Check if this is a valid build_vector for the immediate
10649 /// operand of a vector shift operation, where all the elements of the
10650 /// build_vector must have the same constant integer value.
10651 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
10652  // Ignore bit_converts.
10653  while (Op.getOpcode() == ISD::BITCAST)
10654  Op = Op.getOperand(0);
10655  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
10656  APInt SplatBits, SplatUndef;
10657  unsigned SplatBitSize;
10658  bool HasAnyUndefs;
10659  if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
10660  HasAnyUndefs, ElementBits) ||
10661  SplatBitSize > ElementBits)
10662  return false;
10663  Cnt = SplatBits.getSExtValue();
10664  return true;
10665 }
10666 
10667 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
10668 /// operand of a vector shift left operation. That value must be in the range:
10669 /// 0 <= Value < ElementBits for a left shift; or
10670 /// 0 <= Value <= ElementBits for a long left shift.
10671 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
10672  assert(VT.isVector() && "vector shift count is not a vector type");
10673  int64_t ElementBits = VT.getScalarSizeInBits();
10674  if (!getVShiftImm(Op, ElementBits, Cnt))
10675  return false;
10676  return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
10677 }
10678 
10679 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
10680 /// operand of a vector shift right operation. The value must be in the range:
10681 /// 1 <= Value <= ElementBits for a right shift; or
10682 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
10683  assert(VT.isVector() && "vector shift count is not a vector type");
10684  int64_t ElementBits = VT.getScalarSizeInBits();
10685  if (!getVShiftImm(Op, ElementBits, Cnt))
10686  return false;
10687  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
10688 }
10689 
10690 SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
10691  SelectionDAG &DAG) const {
10692  EVT VT = Op.getValueType();
10693 
10694  if (VT.getScalarType() == MVT::i1) {
10695  // Lower i1 truncate to `(x & 1) != 0`.
10696  SDLoc dl(Op);
10697  EVT OpVT = Op.getOperand(0).getValueType();
10698  SDValue Zero = DAG.getConstant(0, dl, OpVT);
10699  SDValue One = DAG.getConstant(1, dl, OpVT);
10700  SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
10701  return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
10702  }
10703 
10704  if (!VT.isVector() || VT.isScalableVector())
10705  return SDValue();
10706 
10707  if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
10708  return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
10709 
10710  return SDValue();
10711 }
10712 
10713 SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
10714  SelectionDAG &DAG) const {
10715  EVT VT = Op.getValueType();
10716  SDLoc DL(Op);
10717  int64_t Cnt;
10718 
10719  if (!Op.getOperand(1).getValueType().isVector())
10720  return Op;
10721  unsigned EltSize = VT.getScalarSizeInBits();
10722 
10723  switch (Op.getOpcode()) {
10724  default:
10725  llvm_unreachable("unexpected shift opcode");
10726 
10727  case ISD::SHL:
10728  if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
10729  return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
10730 
10731  if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
10732  return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
10733  DAG.getConstant(Cnt, DL, MVT::i32));
10734  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
10735  DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
10736  MVT::i32),
10737  Op.getOperand(0), Op.getOperand(1));
10738  case ISD::SRA:
10739  case ISD::SRL:
10740  if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
10741  unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
10743  return LowerToPredicatedOp(Op, DAG, Opc);
10744  }
10745 
10746  // Right shift immediate
10747  if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
10748  unsigned Opc =
10749  (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
10750  return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
10751  DAG.getConstant(Cnt, DL, MVT::i32));
10752  }
10753 
10754  // Right shift register. Note, there is not a shift right register
10755  // instruction, but the shift left register instruction takes a signed
10756  // value, where negative numbers specify a right shift.
10757  unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
10758  : Intrinsic::aarch64_neon_ushl;
10759  // negate the shift amount
10760  SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
10761  Op.getOperand(1));
10762  SDValue NegShiftLeft =
10764  DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
10765  NegShift);
10766  return NegShiftLeft;
10767  }
10768 
10769  return SDValue();
10770 }
10771 
10773  AArch64CC::CondCode CC, bool NoNans, EVT VT,
10774  const SDLoc &dl, SelectionDAG &DAG) {
10775  EVT SrcVT = LHS.getValueType();
10776  assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
10777  "function only supposed to emit natural comparisons");
10778 
10779  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10780  APInt CnstBits(VT.getSizeInBits(), 0);
10781  APInt UndefBits(VT.getSizeInBits(), 0);
10782  bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
10783  bool IsZero = IsCnst && (CnstBits == 0);
10784 
10785  if (SrcVT.getVectorElementType().isFloatingPoint()) {
10786  switch (CC) {
10787  default:
10788  return SDValue();
10789  case AArch64CC::NE: {
10790  SDValue Fcmeq;
10791  if (IsZero)
10792  Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
10793  else
10794  Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
10795  return DAG.getNOT(dl, Fcmeq, VT);
10796  }
10797  case AArch64CC::EQ:
10798  if (IsZero)
10799  return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
10800  return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
10801  case AArch64CC::GE:
10802  if (IsZero)
10803  return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
10804  return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
10805  case AArch64CC::GT:
10806  if (IsZero)
10807  return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
10808  return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
10809  case AArch64CC::LS:
10810  if (IsZero)
10811  return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
10812  return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
10813  case AArch64CC::LT:
10814  if (!NoNans)
10815  return SDValue();
10816  // If we ignore NaNs then we can use to the MI implementation.
10818  case AArch64CC::MI:
10819  if (IsZero)
10820  return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
10821  return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
10822  }
10823  }
10824 
10825  switch (CC) {
10826  default:
10827  return SDValue();
10828  case AArch64CC::NE: {
10829  SDValue Cmeq;
10830  if (IsZero)
10831  Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
10832  else
10833  Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
10834  return DAG.getNOT(dl, Cmeq, VT);
10835  }
10836  case AArch64CC::EQ:
10837  if (IsZero)
10838  return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
10839  return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
10840  case AArch64CC::GE:
10841  if (IsZero)
10842  return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
10843  return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
10844  case AArch64CC::GT:
10845  if (IsZero)
10846  return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
10847  return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
10848  case AArch64CC::LE:
10849  if (IsZero)
10850  return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
10851  return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
10852  case AArch64CC::LS:
10853  return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
10854  case AArch64CC::LO:
10855  return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
10856  case AArch64CC::LT:
10857  if (IsZero)
10858  return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
10859  return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
10860  case AArch64CC::HI:
10861  return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
10862  case AArch64CC::HS:
10863  return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
10864  }
10865 }
10866 
10867 SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
10868  SelectionDAG &DAG) const {
10869  if (Op.getValueType().isScalableVector())
10870  return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
10871 
10872  if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
10873  return LowerFixedLengthVectorSetccToSVE(Op, DAG);
10874 
10875  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10876  SDValue LHS = Op.getOperand(0);
10877  SDValue RHS = Op.getOperand(1);
10879  SDLoc dl(Op);
10880 
10882  assert(LHS.getValueType() == RHS.getValueType());
10884  SDValue Cmp =
10885  EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
10886  return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
10887  }
10888 
10889  const bool FullFP16 =
10890  static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
10891 
10892  // Make v4f16 (only) fcmp operations utilise vector instructions
10893  // v8f16 support will be a litle more complicated
10894  if (!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) {
10895  if (LHS.getValueType().getVectorNumElements() == 4) {
10896  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
10897  RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
10898  SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
10899  DAG.ReplaceAllUsesWith(Op, NewSetcc);
10900  CmpVT = MVT::v4i32;
10901  } else
10902  return SDValue();
10903  }
10904 
10905  assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||
10907 
10908  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
10909  // clean. Some of them require two branches to implement.
10910  AArch64CC::CondCode CC1, CC2;
10911  bool ShouldInvert;
10912  changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
10913 
10915  SDValue Cmp =
10916  EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
10917  if (!Cmp.getNode())
10918  return SDValue();
10919 
10920  if (CC2 != AArch64CC::AL) {
10921  SDValue Cmp2 =
10922  EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
10923  if (!Cmp2.getNode())
10924  return SDValue();
10925 
10926  Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
10927  }
10928 
10929  Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
10930 
10931  if (ShouldInvert)
10932  Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
10933 
10934  return Cmp;
10935 }
10936 
10937 static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
10938  SelectionDAG &DAG) {
10939  SDValue VecOp = ScalarOp.getOperand(0);
10940  auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
10941  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
10942  DAG.getConstant(0, DL, MVT::i64));
10943 }
10944 
10945 SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
10946  SelectionDAG &DAG) const {
10947  SDValue Src = Op.getOperand(0);
10948 
10949  // Try to lower fixed length reductions to SVE.
10950  EVT SrcVT = Src.getValueType();
10951  bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND ||
10952  Op.getOpcode() == ISD::VECREDUCE_OR ||
10953  Op.getOpcode() == ISD::VECREDUCE_XOR ||
10954  Op.getOpcode() == ISD::VECREDUCE_FADD ||
10955  (Op.getOpcode() != ISD::VECREDUCE_ADD &&
10956  SrcVT.getVectorElementType() == MVT::i64);
10957  if (SrcVT.isScalableVector() ||
10958  useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
10959 
10960  if (SrcVT.getVectorElementType() == MVT::i1)
10961  return LowerPredReductionToSVE(Op, DAG);
10962 
10963  switch (Op.getOpcode()) {
10964  case ISD::VECREDUCE_ADD:
10965  return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
10966  case ISD::VECREDUCE_AND:
10967  return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
10968  case ISD::VECREDUCE_OR:
10969  return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
10970  case ISD::VECREDUCE_SMAX:
10971  return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
10972  case ISD::VECREDUCE_SMIN:
10973  return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
10974  case ISD::VECREDUCE_UMAX:
10975  return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
10976  case ISD::VECREDUCE_UMIN:
10977  return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
10978  case ISD::VECREDUCE_XOR:
10979  return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
10980  case ISD::VECREDUCE_FADD:
10981  return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
10982  case ISD::VECREDUCE_FMAX:
10983  return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
10984  case ISD::VECREDUCE_FMIN:
10985  return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
10986  default:
10987  llvm_unreachable("Unhandled fixed length reduction");
10988  }
10989  }
10990 
10991  // Lower NEON reductions.
10992  SDLoc dl(Op);
10993  switch (Op.getOpcode()) {
10994  case ISD::VECREDUCE_ADD:
10995  return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
10996  case ISD::VECREDUCE_SMAX:
10997  return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
10998  case ISD::VECREDUCE_SMIN:
10999  return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
11000  case ISD::VECREDUCE_UMAX:
11001  return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
11002  case ISD::VECREDUCE_UMIN:
11003  return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
11004  case ISD::VECREDUCE_FMAX: {
11005  return DAG.getNode(
11006  ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
11007  DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
11008  Src);
11009  }
11010  case ISD::VECREDUCE_FMIN: {
11011  return DAG.getNode(
11012  ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
11013  DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
11014  Src);
11015  }
11016  default:
11017  llvm_unreachable("Unhandled reduction");
11018  }
11019 }
11020 
11021 SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
11022  SelectionDAG &DAG) const {
11023  auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
11024  if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
11025  return SDValue();
11026 
11027  // LSE has an atomic load-add instruction, but not a load-sub.
11028  SDLoc dl(Op);
11029  MVT VT = Op.getSimpleValueType();
11030  SDValue RHS = Op.getOperand(2);
11031  AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
11032  RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
11033  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
11034  Op.getOperand(0), Op.getOperand(1), RHS,
11035  AN->getMemOperand());
11036 }
11037 
11038 SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
11039  SelectionDAG &DAG) const {
11040  auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
11041  if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
11042  return SDValue();
11043 
11044  // LSE has an atomic load-clear instruction, but not a load-and.
11045  SDLoc dl(Op);
11046  MVT VT = Op.getSimpleValueType();
11047  SDValue RHS = Op.getOperand(2);
11048  AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
11049  RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
11050  return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
11051  Op.getOperand(0), Op.getOperand(1), RHS,
11052  AN->getMemOperand());
11053 }
11054 
11055 SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
11056  SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
11057  SDLoc dl(Op);
11058  EVT PtrVT = getPointerTy(DAG.getDataLayout());
11059  SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);
11060 
11061  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
11062  const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
11063  if (Subtarget->hasCustomCallingConv())
11064  TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
11065 
11066  Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
11067  DAG.getConstant(4, dl, MVT::i64));
11068  Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
11069  Chain =
11071  Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
11072  DAG.getRegisterMask(Mask), Chain.getValue(1));
11073  // To match the actual intent better, we should read the output from X15 here
11074  // again (instead of potentially spilling it to the stack), but rereading Size
11075  // from X15 here doesn't work at -O0, since it thinks that X15 is undefined
11076  // here.
11077 
11078  Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
11079  DAG.getConstant(4, dl, MVT::i64));
11080  return Chain;
11081 }
11082 
11083 SDValue
11084 AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
11085  SelectionDAG &DAG) const {
11086  assert(Subtarget->isTargetWindows() &&
11087  "Only Windows alloca probing supported");
11088  SDLoc dl(Op);
11089  // Get the inputs.
11090  SDNode *Node = Op.getNode();
11091  SDValue Chain = Op.getOperand(0);
11092  SDValue Size = Op.getOperand(1);
11093  MaybeAlign Align =
11094  cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
11095  EVT VT = Node->getValueType(0);
11096 
11098  "no-stack-arg-probe")) {
11099  SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
11100  Chain = SP.getValue(1);
11101  SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
11102  if (Align)
11103  SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
11104  DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
11105  Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
11106  SDValue Ops[2] = {SP, Chain};
11107  return DAG.getMergeValues(Ops, dl);
11108  }
11109 
11110  Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
11111 
11112  Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);
11113 
11114  SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
11115  Chain = SP.getValue(1);
11116  SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
11117  if (Align)
11118  SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
11119  DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
11120  Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
11121 
11122  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
11123  DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
11124 
11125  SDValue Ops[2] = {SP, Chain};
11126  return DAG.getMergeValues(Ops, dl);
11127 }
11128 
11129 SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
11130  SelectionDAG &DAG) const {
11131  EVT VT = Op.getValueType();
11132  assert(VT != MVT::i64 && "Expected illegal VSCALE node");
11133 
11134  SDLoc DL(Op);
11135  APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue();
11136  return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sextOrSelf(64)),
11137  DL, VT);
11138 }
11139 
11140 /// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
11141 template <unsigned NumVecs>
11142 static bool
11144  AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
11145  Info.opc = ISD::INTRINSIC_VOID;
11146  // Retrieve EC from first vector argument.
11147  const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
11149 #ifndef NDEBUG
11150  // Check the assumption that all input vectors are the same type.
11151  for (unsigned I = 0; I < NumVecs; ++I)
11152  assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
11153  "Invalid type.");
11154 #endif
11155  // memVT is `NumVecs * VT`.
11156  Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
11157  EC * NumVecs);
11158  Info.ptrVal = CI.getArgOperand(CI.getNumArgOperands() - 1);
11159  Info.offset = 0;
11160  Info.align.reset();
11162  return true;
11163 }
11164 
11165 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
11166 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
11167 /// specified in the intrinsic calls.
11169  const CallInst &I,
11170  MachineFunction &MF,
11171  unsigned Intrinsic) const {
11172  auto &DL = I.getModule()->getDataLayout();
11173  switch (Intrinsic) {
11174  case Intrinsic::aarch64_sve_st2:
11175  return setInfoSVEStN<2>(*this, DL, Info, I);
11176  case Intrinsic::aarch64_sve_st3:
11177  return setInfoSVEStN<3>(*this, DL, Info, I);
11178  case Intrinsic::aarch64_sve_st4:
11179  return setInfoSVEStN<4>(*this, DL, Info, I);
11180  case Intrinsic::aarch64_neon_ld2:
11181  case Intrinsic::aarch64_neon_ld3:
11182  case Intrinsic::aarch64_neon_ld4:
11183  case Intrinsic::aarch64_neon_ld1x2:
11184  case Intrinsic::aarch64_neon_ld1x3:
11185  case Intrinsic::aarch64_neon_ld1x4:
11186  case Intrinsic::aarch64_neon_ld2lane:
11187  case Intrinsic::aarch64_neon_ld3lane:
11188  case Intrinsic::aarch64_neon_ld4lane:
11189  case Intrinsic::aarch64_neon_ld2r:
11190  case Intrinsic::aarch64_neon_ld3r:
11191  case Intrinsic::aarch64_neon_ld4r: {
11193  // Conservatively set memVT to the entire set of vectors loaded.
11194  uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
11195  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
11196  Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
11197  Info.offset = 0;
11198  Info.align.reset();
11199  // volatile loads with NEON intrinsics not supported
11201  return true;
11202  }
11203  case Intrinsic::aarch64_neon_st2:
11204  case Intrinsic::aarch64_neon_st3:
11205  case Intrinsic::aarch64_neon_st4:
11206  case Intrinsic::aarch64_neon_st1x2:
11207  case Intrinsic::aarch64_neon_st1x3:
11208  case Intrinsic::aarch64_neon_st1x4:
11209  case Intrinsic::aarch64_neon_st2lane:
11210  case Intrinsic::aarch64_neon_st3lane:
11211  case Intrinsic::aarch64_neon_st4lane: {
11212  Info.opc = ISD::INTRINSIC_VOID;
11213  // Conservatively set memVT to the entire set of vectors stored.
11214  unsigned NumElts = 0;
11215  for (unsigned ArgI = 0, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
11216  Type *ArgTy = I.getArgOperand(ArgI)->getType();
11217  if (!ArgTy->isVectorTy())
11218  break;
11219  NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
11220  }
11221  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
11222  Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
11223  Info.offset = 0;
11224  Info.align.reset();
11225  // volatile stores with NEON intrinsics not supported
11227  return true;
11228  }
11229  case Intrinsic::aarch64_ldaxr:
11230  case Intrinsic::aarch64_ldxr: {
11231  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
11233  Info.memVT = MVT::getVT(PtrTy->getElementType());
11234  Info.ptrVal = I.getArgOperand(0);
11235  Info.offset = 0;
11236  Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11238  return true;
11239  }
11240  case Intrinsic::aarch64_stlxr:
11241  case Intrinsic::aarch64_stxr: {
11242  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
11244  Info.memVT = MVT::getVT(PtrTy->getElementType());
11245  Info.ptrVal = I.getArgOperand(1);
11246  Info.offset = 0;
11247  Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11249  return true;
11250  }
11251  case Intrinsic::aarch64_ldaxp:
11252  case Intrinsic::aarch64_ldxp:
11254  Info.memVT = MVT::i128;
11255  Info.ptrVal = I.getArgOperand(0);
11256  Info.offset = 0;
11257  Info.align = Align(16);
11259  return true;
11260  case Intrinsic::aarch64_stlxp:
11261  case Intrinsic::aarch64_stxp:
11263  Info.memVT = MVT::i128;
11264  Info.ptrVal = I.getArgOperand(2);
11265  Info.offset = 0;
11266  Info.align = Align(16);
11268  return true;
11269  case Intrinsic::aarch64_sve_ldnt1: {
11270  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
11272  Info.memVT = MVT::getVT(I.getType());
11273  Info.ptrVal = I.getArgOperand(1);
11274  Info.offset = 0;
11275  Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11277  if (Intrinsic == Intrinsic::aarch64_sve_ldnt1)
11279  return true;
11280  }
11281  case Intrinsic::aarch64_sve_stnt1: {
11282  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
11284  Info.memVT = MVT::getVT(I.getOperand(0)->getType());
11285  Info.ptrVal = I.getArgOperand(2);
11286  Info.offset = 0;
11287  Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11289  if (Intrinsic == Intrinsic::aarch64_sve_stnt1)
11291  return true;
11292  }
11293  default:
11294  break;
11295  }
11296 
11297  return false;
11298 }
11299 
11301  ISD::LoadExtType ExtTy,
11302  EVT NewVT) const {
11303  // TODO: This may be worth removing. Check regression tests for diffs.
11304  if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
11305  return false;
11306 
11307  // If we're reducing the load width in order to avoid having to use an extra
11308  // instruction to do extension then it's probably a good idea.
11309  if (ExtTy != ISD::NON_EXTLOAD)
11310  return true;
11311  // Don't reduce load width if it would prevent us from combining a shift into
11312  // the offset.
11313  MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
11314  assert(Mem);
11315  const SDValue &Base = Mem->getBasePtr();
11316  if (Base.getOpcode() == ISD::ADD &&
11317  Base.getOperand(1).getOpcode() == ISD::SHL &&
11318  Base.getOperand(1).hasOneUse() &&
11319  Base.getOperand(1).getOperand(1).getOpcode() == ISD::Constant) {
11320  // The shift can be combined if it matches the size of the value being
11321  // loaded (and so reducing the width would make it not match).
11322  uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
11323  uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
11324  if (ShiftAmount == Log2_32(LoadBytes))
11325  return false;
11326  }
11327  // We have no reason to disallow reducing the load width, so allow it.
11328  return true;
11329 }
11330 
11331 // Truncations from 64-bit GPR to 32-bit GPR is free.
11333  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
11334  return false;
11335  uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize();
11336  uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize();
11337  return NumBits1 > NumBits2;
11338 }
11340  if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
11341  return false;
11342  uint64_t NumBits1 = VT1.getFixedSizeInBits();
11343  uint64_t NumBits2 = VT2.getFixedSizeInBits();
11344  return NumBits1 > NumBits2;
11345 }
11346 
11347 /// Check if it is profitable to hoist instruction in then/else to if.
11348 /// Not profitable if I and it's user can form a FMA instruction
11349 /// because we prefer FMSUB/FMADD.
11351  if (I->getOpcode() != Instruction::FMul)
11352  return true;
11353 
11354  if (!I->hasOneUse())
11355  return true;
11356 
11357  Instruction *User = I->user_back();
11358 
11359  if (User &&
11360  !(User->getOpcode() == Instruction::FSub ||
11361  User->getOpcode() == Instruction::FAdd))
11362  return true;
11363 
11365  const Function *F = I->getFunction();
11366  const DataLayout &DL = F->getParent()->getDataLayout();
11367  Type *Ty = User->getOperand(0)->getType();
11368 
11369  return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
11371  (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11372  Options.UnsafeFPMath));
11373 }
11374 
11375 // All 32-bit GPR operations implicitly zero the high-half of the corresponding
11376 // 64-bit GPR.
11378  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
11379  return false;
11380  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
11381  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
11382  return NumBits1 == 32 && NumBits2 == 64;
11383 }
11385  if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
11386  return false;
11387  unsigned NumBits1 = VT1.getSizeInBits();
11388  unsigned NumBits2 = VT2.getSizeInBits();
11389  return NumBits1 == 32 && NumBits2 == 64;
11390 }
11391 
11393  EVT VT1 = Val.getValueType();
11394  if (isZExtFree(VT1, VT2)) {
11395  return true;
11396  }
11397 
11398  if (Val.getOpcode() != ISD::LOAD)
11399  return false;
11400 
11401  // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
11402  return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
11403  VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
11404  VT1.getSizeInBits() <= 32);
11405 }
11406 
11407 bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
11408  if (isa<FPExtInst>(Ext))
11409  return false;
11410 
11411  // Vector types are not free.
11412  if (Ext->getType()->isVectorTy())
11413  return false;
11414 
11415  for (const Use &U : Ext->uses()) {
11416  // The extension is free if we can fold it with a left shift in an
11417  // addressing mode or an arithmetic operation: add, sub, and cmp.
11418 
11419  // Is there a shift?
11420  const Instruction *Instr = cast<Instruction>(U.getUser());
11421 
11422  // Is this a constant shift?
11423  switch (Instr->getOpcode()) {
11424  case Instruction::Shl:
11425  if (!isa<ConstantInt>(Instr->getOperand(1)))
11426  return false;
11427  break;
11428  case Instruction::GetElementPtr: {
11429  gep_type_iterator GTI = gep_type_begin(Instr);
11430  auto &DL = Ext->getModule()->getDataLayout();
11431  std::advance(GTI, U.getOperandNo()-1);
11432  Type *IdxTy = GTI.getIndexedType();
11433  // This extension will end up with a shift because of the scaling factor.
11434  // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
11435  // Get the shift amount based on the scaling factor:
11436  // log2(sizeof(IdxTy)) - log2(8).
11437  uint64_t ShiftAmt =
11438  countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3;
11439  // Is the constant foldable in the shift of the addressing mode?
11440  // I.e., shift amount is between 1 and 4 inclusive.
11441  if (ShiftAmt == 0 || ShiftAmt > 4)
11442  return false;
11443  break;
11444  }
11445  case Instruction::Trunc:
11446  // Check if this is a noop.
11447  // trunc(sext ty1 to ty2) to ty1.
11448  if (Instr->getType() == Ext->getOperand(0)->getType())
11449  continue;
11451  default:
11452  return false;
11453  }
11454 
11455  // At this point we can use the bfm family, so this extension is free
11456  // for that use.
11457  }
11458  return true;
11459 }
11460 
11461 /// Check if both Op1 and Op2 are shufflevector extracts of either the lower
11462 /// or upper half of the vector elements.
11463 static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
11464  auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
11465  auto *FullTy = FullV->getType();
11466  auto *HalfTy = HalfV->getType();
11467  return FullTy->getPrimitiveSizeInBits().getFixedSize() ==
11468  2 * HalfTy->getPrimitiveSizeInBits().getFixedSize();
11469  };
11470 
11471  auto extractHalf = [](Value *FullV, Value *HalfV) {
11472  auto *FullVT = cast<FixedVectorType>(FullV->getType());
11473  auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
11474  return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
11475  };
11476 
11477  ArrayRef<int> M1, M2;
11478  Value *S1Op1, *S2Op1;
11479  if (!match(Op1, m_Shuffle(m_Value(S1Op1), m_Undef(), m_Mask(M1))) ||
11480  !match(Op2, m_Shuffle(m_Value(S2Op1), m_Undef(), m_Mask(M2))))
11481  return false;
11482 
11483  // Check that the operands are half as wide as the result and we extract
11484  // half of the elements of the input vectors.
11485  if (!areTypesHalfed(S1Op1, Op1) || !areTypesHalfed(S2Op1, Op2) ||
11486  !extractHalf(S1Op1, Op1) || !extractHalf(S2Op1, Op2))
11487  return false;
11488 
11489  // Check the mask extracts either the lower or upper half of vector
11490  // elements.
11491  int M1Start = -1;
11492  int M2Start = -1;
11493  int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
11494  if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) ||
11495  !ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) ||
11496  M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
11497  return false;
11498 
11499  return true;
11500 }
11501 
11502 /// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
11503 /// of the vector elements.
11504 static bool areExtractExts(Value *Ext1, Value *Ext2) {
11505  auto areExtDoubled = [](Instruction *Ext) {
11506  return Ext->getType()->getScalarSizeInBits() ==
11507  2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
11508  };
11509 
11510  if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
11511  !match(Ext2, m_ZExtOrSExt(m_Value())) ||
11512  !areExtDoubled(cast<Instruction>(Ext1)) ||
11513  !areExtDoubled(cast<Instruction>(Ext2)))
11514  return false;
11515 
11516  return true;
11517 }
11518 
11519 /// Check if Op could be used with vmull_high_p64 intrinsic.
11521  Value *VectorOperand = nullptr;
11522  ConstantInt *ElementIndex = nullptr;
11523  return match(Op, m_ExtractElt(m_Value(VectorOperand),
11524  m_ConstantInt(ElementIndex))) &&
11525  ElementIndex->getValue() == 1 &&
11526  isa<FixedVectorType>(VectorOperand->getType()) &&
11527  cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2;
11528 }
11529 
11530 /// Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
11531 static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) {
11533 }
11534 
11535 /// Check if sinking \p I's operands to I's basic block is profitable, because
11536 /// the operands can be folded into a target instruction, e.g.
11537 /// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
11539  Instruction *I, SmallVectorImpl<Use *> &Ops) const {
11540  if (!I->getType()->isVectorTy())
11541  return false;
11542 
11543  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
11544  switch (II->getIntrinsicID()) {
11545  case Intrinsic::aarch64_neon_umull:
11546  if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
11547  return false;
11548  Ops.push_back(&II->getOperandUse(0));
11549  Ops.push_back(&II->getOperandUse(1));
11550  return true;
11551 
11552  case Intrinsic::aarch64_neon_pmull64:
11553  if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
11554  II->getArgOperand(1)))
11555  return false;
11556  Ops.push_back(&II->getArgOperandUse(0));
11557  Ops.push_back(&II->getArgOperandUse(1));
11558  return true;
11559 
11560  default:
11561  return false;
11562  }
11563  }
11564 
11565  switch (I->getOpcode()) {
11566  case Instruction::Sub:
11567  case Instruction::Add: {
11568  if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
11569  return false;
11570 
11571  // If the exts' operands extract either the lower or upper elements, we
11572  // can sink them too.
11573  auto Ext1 = cast<Instruction>(I->getOperand(0));
11574  auto Ext2 = cast<Instruction>(I->getOperand(1));
11575  if (areExtractShuffleVectors(Ext1, Ext2)) {
11576  Ops.push_back(&Ext1->getOperandUse(0));
11577  Ops.push_back(&Ext2->getOperandUse(0));
11578  }
11579 
11580  Ops.push_back(&I->getOperandUse(0));
11581  Ops.push_back(&I->getOperandUse(1));
11582 
11583  return true;
11584  }
11585  case Instruction::Mul: {
11586  bool IsProfitable = false;
11587  for (auto &Op : I->operands()) {
11588  // Make sure we are not already sinking this operand
11589  if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
11590  continue;
11591 
11592  ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
11593  if (!Shuffle || !Shuffle->isZeroEltSplat())
11594  continue;
11595 
11596  Value *ShuffleOperand = Shuffle->getOperand(0);
11597  InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
11598  if (!Insert)
11599  continue;
11600 
11601  Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
11602  if (!OperandInstr)
11603  continue;
11604 
11605  ConstantInt *ElementConstant =
11606  dyn_cast<ConstantInt>(Insert->getOperand(2));
11607  // Check that the insertelement is inserting into element 0
11608  if (!ElementConstant || ElementConstant->getZExtValue() != 0)
11609  continue;
11610 
11611  unsigned Opcode = OperandInstr->getOpcode();
11612  if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
11613  continue;
11614 
11615  Ops.push_back(&Shuffle->getOperandUse(0));
11616  Ops.push_back(&Op);
11617  IsProfitable = true;
11618  }
11619 
11620  return IsProfitable;
11621  }
11622  default:
11623  return false;
11624  }
11625  return false;
11626 }
11627 
11629  Align &RequiredAligment) const {
11630  if (!LoadedType.isSimple() ||
11631  (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
11632  return false;
11633  // Cyclone supports unaligned accesses.
11634  RequiredAligment = Align(1);
11635  unsigned NumBits = LoadedType.getSizeInBits();
11636  return NumBits == 32 || NumBits == 64;
11637 }
11638 
11639 /// A helper function for determining the number of interleaved accesses we
11640 /// will generate when lowering accesses of the given type.
11641 unsigned
11643  const DataLayout &DL) const {
11644  return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
11645 }
11646 
11649  if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
11650  I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr)
11651  return MOStridedAccess;
11653 }
11654 
11656  VectorType *VecTy, const DataLayout &DL) const {
11657 
11658  unsigned VecSize = DL.getTypeSizeInBits(VecTy);
11659  unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
11660 
11661  // Ensure the number of vector elements is greater than 1.
11662  if (cast<FixedVectorType>(VecTy)->getNumElements() < 2)
11663  return false;
11664 
11665  // Ensure the element type is legal.
11666  if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
11667  return false;
11668 
11669  // Ensure the total vector size is 64 or a multiple of 128. Types larger than
11670  // 128 will be split into multiple interleaved accesses.
11671  return VecSize == 64 || VecSize % 128 == 0;
11672 }
11673 
11674 /// Lower an interleaved load into a ldN intrinsic.
11675 ///
11676 /// E.g. Lower an interleaved load (Factor = 2):
11677 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
11678 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
11679 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
11680 ///
11681 /// Into:
11682 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
11683 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
11684 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
11687  ArrayRef<unsigned> Indices, unsigned Factor) const {
11688  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
11689  "Invalid interleave factor");
11690  assert(!Shuffles.empty() && "Empty shufflevector input");
11691  assert(Shuffles.size() == Indices.size() &&
11692  "Unmatched number of shufflevectors and indices");
11693 
11694  const DataLayout &DL = LI->getModule()->getDataLayout();
11695 
11696  VectorType *VTy = Shuffles[0]->getType();
11697 
11698  // Skip if we do not have NEON and skip illegal vector types. We can
11699  // "legalize" wide vector types into multiple interleaved accesses as long as
11700  // the vector types are divisible by 128.
11701  if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VTy, DL))
11702  return false;
11703 
11704  unsigned NumLoads = getNumInterleavedAccesses(VTy, DL);
11705 
11706  auto *FVTy = cast<FixedVectorType>(VTy);
11707 
11708  // A pointer vector can not be the return type of the ldN intrinsics. Need to
11709  // load integer vectors first and then convert to pointer vectors.
11710  Type *EltTy = FVTy->getElementType();
11711  if (EltTy->isPointerTy())
11712  FVTy =
11713  FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
11714 
11715  IRBuilder<> Builder(LI);
11716 
11717  // The base address of the load.
11718  Value *BaseAddr = LI->getPointerOperand();
11719 
11720  if (NumLoads > 1) {
11721  // If we're going to generate more than one load, reset the sub-vector type
11722  // to something legal.
11723  FVTy = FixedVectorType::get(FVTy->getElementType(),
11724  FVTy->getNumElements() / NumLoads);
11725 
11726  // We will compute the pointer operand of each load from the original base
11727  // address using GEPs. Cast the base address to a pointer to the scalar
11728  // element type.
11729  BaseAddr = Builder.CreateBitCast(
11730  BaseAddr,
11731  FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
11732  }
11733 
11734  Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace());
11735  Type *Tys[2] = {FVTy, PtrTy};
11736  static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
11737  Intrinsic::aarch64_neon_ld3,
11738  Intrinsic::aarch64_neon_ld4};
11739  Function *LdNFunc =
11740  Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
11741 
11742  // Holds sub-vectors extracted from the load intrinsic return values. The
11743  // sub-vectors are associated with the shufflevector instructions they will
11744  // replace.
11746 
11747  for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
11748 
11749  // If we're generating more than one load, compute the base address of
11750  // subsequent loads as an offset from the previous.
11751  if (LoadCount > 0)
11752  BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr,
11753  FVTy->getNumElements() * Factor);
11754 
11755  CallInst *LdN = Builder.CreateCall(
11756  LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
11757 
11758  // Extract and store the sub-vectors returned by the load intrinsic.
11759  for (unsigned i = 0; i < Shuffles.size(); i++) {
11760  ShuffleVectorInst *SVI = Shuffles[i];
11761  unsigned Index = Indices[i];
11762 
11763  Value *SubVec = Builder.CreateExtractValue(LdN, Index);
11764 
11765  // Convert the integer vector to pointer vector if the element is pointer.
11766  if (EltTy->isPointerTy())
11767  SubVec = Builder.CreateIntToPtr(
11768  SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
11769  FVTy->getNumElements()));
11770  SubVecs[SVI].push_back(SubVec);
11771  }
11772  }
11773 
11774  // Replace uses of the shufflevector instructions with the sub-vectors
11775  // returned by the load intrinsic. If a shufflevector instruction is
11776  // associated with more than one sub-vector, those sub-vectors will be
11777  // concatenated into a single wide vector.
11778  for (ShuffleVectorInst *SVI : Shuffles) {
11779  auto &SubVec = SubVecs[SVI];
11780  auto *WideVec =
11781  SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
11782  SVI->replaceAllUsesWith(WideVec);
11783  }
11784 
11785  return true;
11786 }
11787 
11788 /// Lower an interleaved store into a stN intrinsic.
11789 ///
11790 /// E.g. Lower an interleaved store (Factor = 3):
11791 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
11792 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
11793 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
11794 ///
11795 /// Into:
11796 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
11797 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
11798 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
11799 /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
11800 ///
11801 /// Note that the new shufflevectors will be removed and we'll only generate one
11802 /// st3 instruction in CodeGen.
11803 ///
11804 /// Example for a more general valid mask (Factor 3). Lower:
11805 /// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
11806 /// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
11807 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
11808 ///
11809 /// Into:
11810 /// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
11811 /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
11812 /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
11813 /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
11815  ShuffleVectorInst *SVI,
11816  unsigned Factor) const {
11817  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
11818  "Invalid interleave factor");
11819 
11820  auto *VecTy = cast<FixedVectorType>(SVI->getType());
11821  assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
11822 
11823  unsigned LaneLen = VecTy->getNumElements() / Factor;
11824  Type *EltTy = VecTy->getElementType();
11825  auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
11826 
11827  const DataLayout &DL = SI->getModule()->getDataLayout();
11828 
11829  // Skip if we do not have NEON and skip illegal vector types. We can
11830  // "legalize" wide vector types into multiple interleaved accesses as long as
11831  // the vector types are divisible by 128.
11832  if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
11833  return false;
11834 
11835  unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
11836 
11837  Value *Op0 = SVI->getOperand(0);
11838  Value *Op1 = SVI->getOperand(1);
11840 
11841  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
11842  // vectors to integer vectors.
11843  if (EltTy->isPointerTy()) {
11844  Type *IntTy = DL.getIntPtrType(EltTy);
11845  unsigned NumOpElts =
11846  cast<FixedVectorType>(Op0->getType())->getNumElements();
11847 
11848  // Convert to the corresponding integer vector.
11849  auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);
11850  Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
11851  Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
11852 
11853  SubVecTy = FixedVectorType::get(IntTy, LaneLen);
11854  }
11855 
11856  // The base address of the store.
11857  Value *BaseAddr = SI->getPointerOperand();
11858 
11859  if (NumStores > 1) {
11860  // If we're going to generate more than one store, reset the lane length
11861  // and sub-vector type to something legal.
11862  LaneLen /= NumStores;
11863  SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
11864 
11865  // We will compute the pointer operand of each store from the original base
11866  // address using GEPs. Cast the base address to a pointer to the scalar
11867  // element type.
11868  BaseAddr = Builder.CreateBitCast(
11869  BaseAddr,
11870  SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
11871  }
11872 
11873  auto Mask = SVI->getShuffleMask();
11874 
11875  Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
11876  Type *Tys[2] = {SubVecTy, PtrTy};
11877  static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
11878  Intrinsic::aarch64_neon_st3,
11879  Intrinsic::aarch64_neon_st4};
11880  Function *StNFunc =
11881  Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
11882 
11883  for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
11884 
11886 
11887  // Split the shufflevector operands into sub vectors for the new stN call.
11888  for (unsigned i = 0; i < Factor; i++) {
11889  unsigned IdxI = StoreCount * LaneLen * Factor + i;
11890  if (Mask[IdxI] >= 0) {
11891  Ops.push_back(Builder.CreateShuffleVector(
11892  Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
11893  } else {
11894  unsigned StartMask = 0;
11895  for (unsigned j = 1; j < LaneLen; j++) {
11896  unsigned IdxJ = StoreCount * LaneLen * Factor + j;
11897  if (Mask[IdxJ * Factor + IdxI] >= 0) {
11898  StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
11899  break;
11900  }
11901  }
11902  // Note: Filling undef gaps with random elements is ok, since
11903  // those elements were being written anyway (with undefs).
11904  // In the case of all undefs we're defaulting to using elems from 0
11905  // Note: StartMask cannot be negative, it's checked in
11906  // isReInterleaveMask
11907  Ops.push_back(Builder.CreateShuffleVector(
11908  Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
11909  }
11910  }
11911 
11912  // If we generating more than one store, we compute the base address of
11913  // subsequent stores as an offset from the previous.
11914  if (StoreCount > 0)
11915  BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
11916  BaseAddr, LaneLen * Factor);
11917 
11918  Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
11919  Builder.CreateCall(StNFunc, Ops);
11920  }
11921  return true;
11922 }
11923 
11924 // Lower an SVE structured load intrinsic returning a tuple type to target
11925 // specific intrinsic taking the same input but returning a multi-result value
11926 // of the split tuple type.
11927 //
11928 // E.g. Lowering an LD3:
11929 //
11930 // call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32(
11931 // <vscale x 4 x i1> %pred,
11932 // <vscale x 4 x i32>* %addr)
11933 //
11934 // Output DAG:
11935 //
11936 // t0: ch = EntryToken
11937 // t2: nxv4i1,ch = CopyFromReg t0, Register:nxv4i1 %0
11938 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
11939 // t5: nxv4i32,nxv4i32,nxv4i32,ch = AArch64ISD::SVE_LD3 t0, t2, t4
11940 // t6: nxv12i32 = concat_vectors t5, t5:1, t5:2
11941 //
11942 // This is called pre-legalization to avoid widening/splitting issues with
11943 // non-power-of-2 tuple types used for LD3, such as nxv12i32.
11944 SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
11945  ArrayRef<SDValue> LoadOps,
11946  EVT VT, SelectionDAG &DAG,
11947  const SDLoc &DL) const {
11948  assert(VT.isScalableVector() && "Can only lower scalable vectors");
11949 
11950  unsigned N, Opcode;
11951  static std::map<unsigned, std::pair<unsigned, unsigned>> IntrinsicMap = {
11952  {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}},
11953  {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}},
11954  {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
11955 
11956  std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
11958  "invalid tuple vector type!");
11959 
11960  EVT SplitVT =
11963  assert(isTypeLegal(SplitVT));
11964 
11965  SmallVector<EVT, 5> VTs(N, SplitVT);
11966  VTs.push_back(MVT::Other); // Chain
11967  SDVTList NodeTys = DAG.getVTList(VTs);
11968 
11969  SDValue PseudoLoad = DAG.getNode(Opcode, DL, NodeTys, LoadOps);
11970  SmallVector<SDValue, 4> PseudoLoadOps;
11971  for (unsigned I = 0; I < N; ++I)
11972  PseudoLoadOps.push_back(SDValue(PseudoLoad.getNode(), I));
11973  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, PseudoLoadOps);
11974 }
11975 
11977  const MemOp &Op, const AttributeList &FuncAttributes) const {
11978  bool CanImplicitFloat =
11979  !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
11980  bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
11981  bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
11982  // Only use AdvSIMD to implement memset of 32-byte and above. It would have
11983  // taken one instruction to materialize the v2i64 zero and one store (with
11984  // restrictive addressing mode). Just do i64 stores.
11985  bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
11986  auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
11987  if (Op.isAligned(AlignCheck))
11988  return true;
11989  bool Fast;
11990  return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
11992  Fast;
11993  };
11994 
11995  if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
11996  AlignmentIsAcceptable(MVT::v2i64, Align(16)))
11997  return MVT::v2i64;
11998  if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
11999  return MVT::f128;
12000  if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
12001  return MVT::i64;
12002  if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
12003  return MVT::i32;
12004  return MVT::Other;
12005 }
12006 
12008  const MemOp &Op, const AttributeList &FuncAttributes) const {
12009  bool CanImplicitFloat =
12010  !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
12011  bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
12012  bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
12013  // Only use AdvSIMD to implement memset of 32-byte and above. It would have
12014  // taken one instruction to materialize the v2i64 zero and one store (with
12015  // restrictive addressing mode). Just do i64 stores.
12016  bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
12017  auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
12018  if (Op.isAligned(AlignCheck))
12019  return true;
12020  bool Fast;
12021  return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
12023  Fast;
12024  };
12025 
12026  if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
12027  AlignmentIsAcceptable(MVT::v2i64, Align(16)))
12028  return LLT::fixed_vector(2, 64);
12029  if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
12030  return LLT::scalar(128);
12031  if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
12032  return LLT::scalar(64);
12033  if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
12034  return LLT::scalar(32);
12035  return LLT();
12036 }
12037 
12038 // 12-bit optionally shifted immediates are legal for adds.
12040  if (Immed == std::numeric_limits<int64_t>::min()) {
12041  LLVM_DEBUG(dbgs() << "Illegal add imm " << Immed
12042  << ": avoid UB for INT64_MIN\n");
12043  return false;
12044  }
12045  // Same encoding for add/sub, just flip the sign.
12046  Immed = std::abs(Immed);
12047  bool IsLegal = ((Immed >> 12) == 0 ||
12048  ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
12049  LLVM_DEBUG(dbgs() << "Is " << Immed
12050  << " legal add imm: " << (IsLegal ? "yes" : "no") << "\n");
12051  return IsLegal;
12052 }
12053 
12054 // Integer comparisons are implemented with ADDS/SUBS, so the range of valid
12055 // immediates is the same as for an add or a sub.
12057  return isLegalAddImmediate(Immed);
12058 }
12059 
12060 /// isLegalAddressingMode - Return true if the addressing mode represented
12061 /// by AM is legal for this target, for a load/store of the specified type.
12063  const AddrMode &AM, Type *Ty,
12064  unsigned AS, Instruction *I) const {
12065  // AArch64 has five basic addressing modes:
12066  // reg
12067  // reg + 9-bit signed offset
12068  // reg + SIZE_IN_BYTES * 12-bit unsigned offset
12069  // reg1 + reg2
12070  // reg + SIZE_IN_BYTES * reg
12071 
12072  // No global is ever allowed as a base.
12073  if (AM.BaseGV)
12074  return false;
12075 
12076  // No reg+reg+imm addressing.
12077  if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
12078  return false;
12079 
12080  // FIXME: Update this method to support scalable addressing modes.
12081  if (isa<ScalableVectorType>(Ty)) {
12082  uint64_t VecElemNumBytes =
12083  DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
12084  return AM.HasBaseReg && !AM.BaseOffs &&
12085  (AM.Scale == 0 || (uint64_t)AM.Scale == VecElemNumBytes);
12086  }
12087 
12088  // check reg + imm case:
12089  // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
12090  uint64_t NumBytes = 0;
12091  if (Ty->isSized()) {
12092  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
12093  NumBytes = NumBits / 8;
12094  if (!isPowerOf2_64(NumBits))
12095  NumBytes = 0;
12096  }
12097 
12098  if (!AM.Scale) {
12099  int64_t Offset = AM.BaseOffs;
12100 
12101  // 9-bit signed offset
12102  if (isInt<9>(Offset))
12103  return true;
12104 
12105  // 12-bit unsigned offset
12106  unsigned shift = Log2_64(NumBytes);
12107  if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
12108  // Must be a multiple of NumBytes (NumBytes is a power of 2)
12109  (Offset >> shift) << shift == Offset)
12110  return true;
12111  return false;
12112  }
12113 
12114  // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
12115 
12116  return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
12117 }
12118 
12120  // Consider splitting large offset of struct or array.
12121  return true;
12122 }
12123 
12125  const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const {
12126  // Scaling factors are not free at all.
12127  // Operands | Rt Latency
12128  // -------------------------------------------
12129  // Rt, [Xn, Xm] | 4
12130  // -------------------------------------------
12131  // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
12132  // Rt, [Xn, Wm, <extend> #imm] |
12133  if (isLegalAddressingMode(DL, AM, Ty, AS))
12134  // Scale represents reg2 * scale, thus account for 1 if
12135  // it is not equal to 0 or 1.
12136  return AM.Scale != 0 && AM.Scale != 1;
12137  return -1;
12138 }
12139 
12141  const MachineFunction &MF, EVT VT) const {
12142  VT = VT.getScalarType();
12143 
12144  if (!VT.isSimple())
12145  return false;
12146 
12147  switch (VT.getSimpleVT().SimpleTy) {
12148  case MVT::f16:
12149  return Subtarget->hasFullFP16();
12150  case MVT::f32:
12151  case MVT::f64:
12152  return true;
12153  default:
12154  break;
12155  }
12156 
12157  return false;
12158 }
12159 
12161  Type *Ty) const {
12162  switch (Ty->getScalarType()->getTypeID()) {
12163  case Type::FloatTyID:
12164  case Type::DoubleTyID:
12165  return true;
12166  default:
12167  return false;
12168  }
12169 }
12170 
12172  EVT VT, CodeGenOpt::Level OptLevel) const {
12173  return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector();
12174 }
12175 
12176 const MCPhysReg *
12178  // LR is a callee-save register, but we must treat it as clobbered by any call
12179  // site. Hence we include LR in the scratch registers, which are in turn added
12180  // as implicit-defs for stackmaps and patchpoints.
12181  static const MCPhysReg ScratchRegs[] = {
12182  AArch64::X16, AArch64::X17, AArch64::LR, 0
12183  };
12184  return ScratchRegs;
12185 }
12186 
12187 bool
12189  CombineLevel Level) const {
12190  N = N->getOperand(0).getNode();
12191  EVT VT = N->getValueType(0);
12192  // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
12193  // it with shift to let it be lowered to UBFX.
12194  if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
12195  isa<ConstantSDNode>(N->getOperand(1))) {
12196  uint64_t TruncMask = N->getConstantOperandVal(1);
12197  if (isMask_64(TruncMask) &&
12198  N->getOperand(0).getOpcode() == ISD::SRL &&
12199  isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
12200  return false;
12201  }
12202  return true;
12203 }
12204 
12206  Type *Ty) const {
12207  assert(Ty->isIntegerTy());
12208 
12209  unsigned BitSize = Ty->getPrimitiveSizeInBits();
12210  if (BitSize == 0)
12211  return false;
12212 
12213  int64_t Val = Imm.getSExtValue();
12214  if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize))
12215  return true;
12216 
12217  if ((int64_t)Val < 0)
12218  Val = ~Val;
12219  if (BitSize == 32)
12220  Val &= (1LL << 32) - 1;
12221 
12222  unsigned LZ = countLeadingZeros((uint64_t)Val);
12223  unsigned Shift = (63 - LZ) / 16;
12224  // MOVZ is free so return true for one or fewer MOVK.
12225  return Shift < 3;
12226 }
12227 
12229  unsigned Index) const {
12231  return false;
12232 
12233  return (Index == 0 || Index == ResVT.getVectorNumElements());
12234 }
12235 
12236 /// Turn vector tests of the signbit in the form of:
12237 /// xor (sra X, elt_size(X)-1), -1
12238 /// into:
12239 /// cmge X, X, #0
12241  const AArch64Subtarget *Subtarget) {
12242  EVT VT = N->getValueType(0);
12243  if (!Subtarget->hasNEON() || !VT.isVector())
12244  return SDValue();
12245 
12246  // There must be a shift right algebraic before the xor, and the xor must be a
12247  // 'not' operation.
12248  SDValue Shift = N->getOperand(0);
12249  SDValue Ones = N->getOperand(1);
12250  if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() ||
12252  return SDValue();
12253 
12254  // The shift should be smearing the sign bit across each vector element.
12255  auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
12256  EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
12257  if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
12258  return SDValue();
12259 
12260  return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
12261 }
12262 
12263 // Given a vecreduce_add node, detect the below pattern and convert it to the
12264 // node sequence with UABDL, [S|U]ADB and UADDLP.
12265 //
12266 // i32 vecreduce_add(
12267 // v16i32 abs(
12268 // v16i32 sub(
12269 // v16i32 [sign|zero]_extend(v16i8 a), v16i32 [sign|zero]_extend(v16i8 b))))
12270 // =================>
12271 // i32 vecreduce_add(
12272 // v4i32 UADDLP(
12273 // v8i16 add(
12274 // v8i16 zext(
12275 // v8i8 [S|U]ABD low8:v16i8 a, low8:v16i8 b
12276 // v8i16 zext(
12277 // v8i8 [S|U]ABD high8:v16i8 a, high8:v16i8 b
12279  SelectionDAG &DAG) {
12280  // Assumed i32 vecreduce_add
12281  if (N->getValueType(0) != MVT::i32)
12282  return SDValue();
12283 
12284  SDValue VecReduceOp0 = N->getOperand(0);
12285  unsigned Opcode = VecReduceOp0.getOpcode();
12286  // Assumed v16i32 abs
12287  if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32)
12288  return SDValue();
12289 
12290  SDValue ABS = VecReduceOp0;
12291  // Assumed v16i32 sub
12292  if (ABS->getOperand(0)->getOpcode() != ISD::SUB ||
12293  ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
12294  return SDValue();
12295 
12296  SDValue SUB = ABS->getOperand(0);
12297  unsigned Opcode0 = SUB->getOperand(0).getOpcode();
12298  unsigned Opcode1 = SUB->getOperand(1).getOpcode();
12299  // Assumed v16i32 type
12300  if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
12301  SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
12302  return SDValue();
12303 
12304  // Assumed zext or sext
12305  bool IsZExt = false;
12306  if (Opcode0 == ISD::ZERO_EXTEND && Opcode1 == ISD::ZERO_EXTEND) {
12307  IsZExt = true;
12308  } else if (Opcode0 == ISD::SIGN_EXTEND && Opcode1 == ISD::SIGN_EXTEND) {
12309  IsZExt = false;
12310  } else
12311  return SDValue();
12312 
12313  SDValue EXT0 = SUB->getOperand(0);
12314  SDValue EXT1 = SUB->getOperand(1);
12315  // Assumed zext's operand has v16i8 type
12316  if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 ||
12317  EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
12318  return SDValue();
12319 
12320  // Pattern is dectected. Let's convert it to sequence of nodes.
12321  SDLoc DL(N);
12322 
12323  // First, create the node pattern of UABD/SABD.
12324  SDValue UABDHigh8Op0 =
12326  DAG.getConstant(8, DL, MVT::i64));
12327  SDValue UABDHigh8Op1 =
12329  DAG.getConstant(8, DL, MVT::i64));
12330  SDValue UABDHigh8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
12331  UABDHigh8Op0, UABDHigh8Op1);
12332  SDValue UABDL = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDHigh8);
12333 
12334  // Second, create the node pattern of UABAL.
12335  SDValue UABDLo8Op0 =
12337  DAG.getConstant(0, DL, MVT::i64));
12338  SDValue UABDLo8Op1 =
12340  DAG.getConstant(0, DL, MVT::i64));
12341  SDValue UABDLo8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
12342  UABDLo8Op0, UABDLo8Op1);
12343  SDValue ZExtUABD = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDLo8);
12344  SDValue UABAL = DAG.getNode(ISD::ADD, DL, MVT::v8i16, UABDL, ZExtUABD);
12345 
12346  // Third, create the node of UADDLP.
12348 
12349  // Fourth, create the node of VECREDUCE_ADD.
12350  return DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, UADDLP);
12351 }
12352 
12353 // Turn a v8i8/v16i8 extended vecreduce into a udot/sdot and vecreduce
12354 // vecreduce.add(ext(A)) to vecreduce.add(DOT(zero, A, one))
12355 // vecreduce.add(mul(ext(A), ext(B))) to vecreduce.add(DOT(zero, A, B))
12357  const AArch64Subtarget *ST) {
12358  if (!ST->hasDotProd())
12360 
12361  SDValue Op0 = N->getOperand(0);
12362  if (N->getValueType(0) != MVT::i32 ||
12364  return SDValue();
12365 
12366  unsigned ExtOpcode = Op0.getOpcode();
12367  SDValue A = Op0;
12368  SDValue B;
12369  if (ExtOpcode == ISD::MUL) {
12370  A = Op0.getOperand(0);
12371  B = Op0.getOperand(1);
12372  if (A.getOpcode() != B.getOpcode() ||
12373  A.getOperand(0).getValueType() != B.getOperand(0).getValueType())
12374  return SDValue();
12375  ExtOpcode = A.getOpcode();
12376  }
12377  if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
12378  return SDValue();
12379 
12380  EVT Op0VT = A.getOperand(0).getValueType();
12381  if (Op0VT != MVT::v8i8 && Op0VT != MVT::v16i8)
12382  return SDValue();
12383 
12384  SDLoc DL(Op0);
12385  // For non-mla reductions B can be set to 1. For MLA we take the operand of
12386  // the extend B.
12387  if (!B)
12388  B = DAG.getConstant(1, DL, Op0VT);
12389  else
12390  B = B.getOperand(0);
12391 
12392  SDValue Zeros =
12393  DAG.getConstant(0, DL, Op0VT == MVT::v8i8 ? MVT::v2i32 : MVT::v4i32);
12394  auto DotOpcode =
12396  SDValue Dot = DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros,
12397  A.getOperand(0), B);
12398  return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
12399 }
12400 
12403  const AArch64Subtarget *Subtarget) {
12404  if (DCI.isBeforeLegalizeOps())
12405  return SDValue();
12406 
12407  return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
12408 }
12409 
12410 SDValue
12411 AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
12412  SelectionDAG &DAG,
12413  SmallVectorImpl<SDNode *> &Created) const {
12415  if (isIntDivCheap(N->getValueType(0), Attr))
12416  return SDValue(N,0); // Lower SDIV as SDIV
12417 
12418  // fold (sdiv X, pow2)
12419  EVT VT = N->getValueType(0);
12420  if ((VT != MVT::i32 && VT != MVT::i64) ||
12421  !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
12422  return SDValue();
12423 
12424  SDLoc DL(N);
12425  SDValue N0 = N->getOperand(0);
12426  unsigned Lg2 = Divisor.countTrailingZeros();
12427  SDValue Zero = DAG.getConstant(0, DL, VT);
12428  SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
12429 
12430  // Add (N0 < 0) ? Pow2 - 1 : 0;
12431  SDValue CCVal;
12432  SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
12433  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
12434  SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
12435 
12436  Created.push_back(Cmp.getNode());
12437  Created.push_back(Add.getNode());
12438  Created.push_back(CSel.getNode());
12439 
12440  // Divide by pow2.
12441  SDValue SRA =
12442  DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
12443 
12444  // If we're dividing by a positive value, we're done. Otherwise, we must
12445  // negate the result.
12446  if (Divisor.isNonNegative())
12447  return SRA;
12448 
12449  Created.push_back(SRA.getNode());
12450  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
12451 }
12452 
12454  switch(getIntrinsicID(S.getNode())) {
12455  default:
12456  break;
12457  case Intrinsic::aarch64_sve_cntb:
12458  case Intrinsic::aarch64_sve_cnth:
12459  case Intrinsic::aarch64_sve_cntw:
12460  case Intrinsic::aarch64_sve_cntd:
12461  return true;
12462  }
12463  return false;
12464 }
12465 
12466 /// Calculates what the pre-extend type is, based on the extension
12467 /// operation node provided by \p Extend.
12468 ///
12469 /// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
12470 /// pre-extend type is pulled directly from the operand, while other extend
12471 /// operations need a bit more inspection to get this information.
12472 ///
12473 /// \param Extend The SDNode from the DAG that represents the extend operation
12474 /// \param DAG The SelectionDAG hosting the \p Extend node
12475 ///
12476 /// \returns The type representing the \p Extend source type, or \p MVT::Other
12477 /// if no valid type can be determined
12479  switch (Extend.getOpcode()) {
12480  case ISD::SIGN_EXTEND:
12481  case ISD::ZERO_EXTEND:
12482  return Extend.getOperand(0).getValueType();
12483  case ISD::AssertSext:
12484  case ISD::AssertZext:
12485  case ISD::SIGN_EXTEND_INREG: {
12486  VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
12487  if (!TypeNode)
12488  return MVT::Other;
12489  return TypeNode->getVT();
12490  }
12491  case ISD::AND: {
12493  dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
12494  if (!Constant)
12495  return MVT::Other;
12496 
12497  uint32_t Mask = Constant->getZExtValue();
12498 
12499  if (Mask == UCHAR_MAX)
12500  return MVT::i8;
12501  else if (Mask == USHRT_MAX)
12502  return MVT::i16;
12503  else if (Mask == UINT_MAX)
12504  return MVT::i32;
12505 
12506  return MVT::Other;
12507  }
12508  default:
12509  return MVT::Other;
12510  }
12511 
12512  llvm_unreachable("Code path unhandled in calculatePreExtendType!");
12513 }
12514 
12515 /// Combines a dup(sext/zext) node pattern into sext/zext(dup)
12516 /// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
12518  SelectionDAG &DAG) {
12519 
12520  ShuffleVectorSDNode *ShuffleNode =
12521  dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
12522  if (!ShuffleNode)
12523  return SDValue();
12524 
12525  // Ensuring the mask is zero before continuing
12526  if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0)
12527  return SDValue();
12528 
12529  SDValue InsertVectorElt = VectorShuffle.getOperand(0);
12530 
12531  if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
12532  return SDValue();
12533 
12534  SDValue InsertLane = InsertVectorElt.getOperand(2);
12535  ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
12536  // Ensures the insert is inserting into lane 0
12537  if (!Constant || Constant->getZExtValue() != 0)
12538  return SDValue();
12539 
12540  SDValue Extend = InsertVectorElt.getOperand(1);
12541  unsigned ExtendOpcode = Extend.getOpcode();
12542 
12543  bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
12544  ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
12545  ExtendOpcode == ISD::AssertSext;
12546  if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
12547  ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
12548  return SDValue();
12549 
12550  EVT TargetType = VectorShuffle.getValueType();
12551  EVT PreExtendType = calculatePreExtendType(Extend, DAG);
12552 
12553  if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 &&
12554  TargetType != MVT::v2i64) ||
12555  (PreExtendType == MVT::Other))
12556  return SDValue();
12557 
12558  // Restrict valid pre-extend data type
12559  if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 &&
12560  PreExtendType != MVT::i32)
12561  return SDValue();
12562 
12563  EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);
12564 
12565  if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount())
12566  return SDValue();
12567 
12568  if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
12569  return SDValue();
12570 
12571  SDLoc DL(VectorShuffle);
12572 
12573  SDValue InsertVectorNode = DAG.getNode(
12574  InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
12575  DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
12576  DAG.getConstant(0, DL, MVT::i64));
12577 
12578  std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
12579 
12580  SDValue VectorShuffleNode =
12581  DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
12582  DAG.getUNDEF(PreExtendVT), ShuffleMask);
12583 
12584  SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
12585  DL, TargetType, VectorShuffleNode);
12586 
12587  return ExtendNode;
12588 }
12589 
12590 /// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
12591 /// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
12593  // If the value type isn't a vector, none of the operands are going to be dups
12594  if (!Mul->getValueType(0).isVector())
12595  return SDValue();
12596 
12599 
12600  // Neither operands have been changed, don't make any further changes
12601  if (!Op0 && !Op1)
12602  return SDValue();
12603 
12604  SDLoc DL(Mul);
12605  return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0),
12606  Op0 ? Op0 : Mul->getOperand(0),
12607  Op1 ? Op1 : Mul->getOperand(1));
12608 }
12609 
12612  const AArch64Subtarget *Subtarget) {
12613 
12615  return Ext;
12616 
12617  if (DCI.isBeforeLegalizeOps())
12618  return SDValue();
12619 
12620  // The below optimizations require a constant RHS.
12621  if (!isa<ConstantSDNode>(N->getOperand(1)))
12622  return SDValue();
12623 
12624  SDValue N0 = N->getOperand(0);
12625  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
12626  const APInt &ConstValue = C->getAPIntValue();
12627 
12628  // Allow the scaling to be folded into the `cnt` instruction by preventing
12629  // the scaling to be obscured here. This makes it easier to pattern match.
12630  if (IsSVECntIntrinsic(N0) ||
12631  (N0->getOpcode() == ISD::TRUNCATE &&
12632  (IsSVECntIntrinsic(N0->getOperand(0)))))
12633  if (ConstValue.sge(1) && ConstValue.sle(16))
12634  return SDValue();
12635 
12636  // Multiplication of a power of two plus/minus one can be done more
12637  // cheaply as as shift+add/sub. For now, this is true unilaterally. If
12638  // future CPUs have a cheaper MADD instruction, this may need to be
12639  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
12640  // 64-bit is 5 cycles, so this is always a win.
12641  // More aggressively, some multiplications N0 * C can be lowered to
12642  // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
12643  // e.g. 6=3*2=(2+1)*2.
12644  // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
12645  // which equals to (1+2)*16-(1+2).
12646 
12647  // TrailingZeroes is used to test if the mul can be lowered to
12648  // shift+add+shift.
12649  unsigned TrailingZeroes = ConstValue.countTrailingZeros();
12650  if (TrailingZeroes) {
12651  // Conservatively do not lower to shift+add+shift if the mul might be
12652  // folded into smul or umul.
12653  if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) ||
12654  isZeroExtended(N0.getNode(), DAG)))
12655  return SDValue();
12656  // Conservatively do not lower to shift+add+shift if the mul might be
12657  // folded into madd or msub.
12658  if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
12659  N->use_begin()->getOpcode() == ISD::SUB))
12660  return SDValue();
12661  }
12662  // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
12663  // and shift+add+shift.
12664  APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
12665 
12666  unsigned ShiftAmt, AddSubOpc;
12667  // Is the shifted value the LHS operand of the add/sub?
12668  bool ShiftValUseIsN0 = true;
12669  // Do we need to negate the result?
12670  bool NegateResult = false;
12671 
12672  if (ConstValue.isNonNegative()) {
12673  // (mul x, 2^N + 1) => (add (shl x, N), x)
12674  // (mul x, 2^N - 1) => (sub (shl x, N), x)
12675  // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
12676  APInt SCVMinus1 = ShiftedConstValue - 1;
12677  APInt CVPlus1 = ConstValue + 1;
12678  if (SCVMinus1.isPowerOf2()) {
12679  ShiftAmt = SCVMinus1.logBase2();
12680  AddSubOpc = ISD::ADD;
12681  } else if (CVPlus1.isPowerOf2()) {
12682  ShiftAmt = CVPlus1.logBase2();
12683  AddSubOpc = ISD::SUB;
12684  } else
12685  return SDValue();
12686  } else {
12687  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
12688  // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
12689  APInt CVNegPlus1 = -ConstValue + 1;
12690  APInt CVNegMinus1 = -ConstValue - 1;
12691  if (CVNegPlus1.isPowerOf2()) {
12692  ShiftAmt = CVNegPlus1.logBase2();
12693  AddSubOpc = ISD::SUB;
12694  ShiftValUseIsN0 = false;
12695  } else if (CVNegMinus1.isPowerOf2()) {
12696  ShiftAmt = CVNegMinus1.logBase2();
12697  AddSubOpc = ISD::ADD;
12698  NegateResult = true;
12699  } else
12700  return SDValue();
12701  }
12702 
12703  SDLoc DL(N);
12704  EVT VT = N->getValueType(0);
12705  SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
12706  DAG.getConstant(ShiftAmt, DL, MVT::i64));
12707 
12708  SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0;
12709  SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal;
12710  SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1);
12711  assert(!(NegateResult && TrailingZeroes) &&
12712  "NegateResult and TrailingZeroes cannot both be true for now.");
12713  // Negate the result.
12714  if (NegateResult)
12715  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
12716  // Shift the result.
12717  if (TrailingZeroes)
12718  return DAG.getNode(ISD::SHL, DL, VT, Res,
12719  DAG.getConstant(TrailingZeroes, DL, MVT::i64));
12720  return Res;
12721 }
12722 
12724  SelectionDAG &DAG) {
12725  // Take advantage of vector comparisons producing 0 or -1 in each lane to
12726  // optimize away operation when it's from a constant.
12727  //
12728  // The general transformation is:
12729  // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
12730  // AND(VECTOR_CMP(x,y), constant2)
12731  // constant2 = UNARYOP(constant)
12732 
12733  // Early exit if this isn't a vector operation, the operand of the
12734  // unary operation isn't a bitwise AND, or if the sizes of the operations
12735  // aren't the same.
12736  EVT VT = N->getValueType(0);
12737  if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
12738  N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
12739  VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
12740  return SDValue();
12741 
12742  // Now check that the other operand of the AND is a constant. We could
12743  // make the transformation for non-constant splats as well, but it's unclear
12744  // that would be a benefit as it would not eliminate any operations, just
12745  // perform one more step in scalar code before moving to the vector unit.
12746  if (BuildVectorSDNode *BV =
12747  dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
12748  // Bail out if the vector isn't a constant.
12749  if (!BV->isConstant())
12750  return SDValue();
12751 
12752  // Everything checks out. Build up the new and improved node.
12753  SDLoc DL(N);
12754  EVT IntVT = BV->getValueType(0);
12755  // Create a new constant of the appropriate type for the transformed
12756  // DAG.
12757  SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
12758  // The AND node needs bitcasts to/from an integer vector type around it.
12759  SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
12760  SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
12761  N->getOperand(0)->getOperand(0), MaskConst);
12762  SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
12763  return Res;
12764  }
12765 
12766  return SDValue();
12767 }
12768 
12770  const AArch64Subtarget *Subtarget) {
12771  // First try to optimize away the conversion when it's conditionally from
12772  // a constant. Vectors only.
12774  return Res;
12775 
12776  EVT VT = N->getValueType(0);
12777  if (VT != MVT::f32 && VT != MVT::f64)
12778  return SDValue();
12779 
12780  // Only optimize when the source and destination types have the same width.
12781  if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
12782  return SDValue();
12783 
12784  // If the result of an integer load is only used by an integer-to-float
12785  // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
12786  // This eliminates an "integer-to-vector-move" UOP and improves throughput.
12787  SDValue N0 = N->getOperand(0);
12788  if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12789  // Do not change the width of a volatile load.
12790  !cast<LoadSDNode>(N0)->isVolatile()) {
12791  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12792  SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
12793  LN0->getPointerInfo(), LN0->getAlignment(),
12794  LN0->getMemOperand()->getFlags());
12795 
12796  // Make sure successors of the original load stay after it by updating them
12797  // to use the new Chain.
12798  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
12799 
12800  unsigned Opcode =
12801  (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
12802  return DAG.getNode(Opcode, SDLoc(N), VT, Load);
12803  }
12804 
12805  return SDValue();
12806 }
12807 
12808 /// Fold a floating-point multiply by power of two into floating-point to
12809 /// fixed-point conversion.
12812  const AArch64Subtarget *Subtarget) {
12813  if (!Subtarget->hasNEON())
12814  return SDValue();
12815 
12816  if (!N->getValueType(0).isSimple())
12817  return SDValue();
12818 
12819  SDValue Op = N->getOperand(0);
12820  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
12821  Op.getOpcode() != ISD::FMUL)
12822  return SDValue();
12823 
12824  SDValue ConstVec = Op->getOperand(1);
12825  if (!isa<BuildVectorSDNode>(ConstVec))
12826  return SDValue();
12827 
12828  MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
12829  uint32_t FloatBits = FloatTy.getSizeInBits();
12830  if (FloatBits != 32 && FloatBits != 64)
12831  return SDValue();
12832 
12833  MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
12834  uint32_t IntBits = IntTy.getSizeInBits();
12835  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
12836  return SDValue();
12837 
12838  // Avoid conversions where iN is larger than the float (e.g., float -> i64).
12839  if (IntBits > FloatBits)
12840  return SDValue();
12841 
12842  BitVector UndefElements;
12843  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
12844  int32_t Bits = IntBits == 64 ? 64 : 32;
12845  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
12846  if (C == -1 || C == 0 || C > Bits)
12847  return SDValue();
12848 
12849  MVT ResTy;
12850  unsigned NumLanes = Op.getValueType().getVectorNumElements();
12851  switch (NumLanes) {
12852  default:
12853  return SDValue();
12854  case 2:
12855  ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
12856  break;
12857  case 4:
12858  ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
12859  break;
12860  }
12861 
12862  if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
12863  return SDValue();
12864 
12865  assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&
12866  "Illegal vector type after legalization");
12867 
12868  SDLoc DL(N);
12869  bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
12870  unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
12871  : Intrinsic::aarch64_neon_vcvtfp2fxu;
12872  SDValue FixConv =
12873  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
12874  DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
12875  Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
12876  // We can handle smaller integers by generating an extra trunc.
12877  if (IntBits < FloatBits)
12878  FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
12879 
12880  return FixConv;
12881 }
12882 
12883 /// Fold a floating-point divide by power of two into fixed-point to
12884 /// floating-point conversion.
12887  const AArch64Subtarget *Subtarget) {
12888  if (!Subtarget->hasNEON())
12889  return SDValue();
12890 
12891  SDValue Op = N->getOperand(0);
12892  unsigned Opc = Op->getOpcode();
12893  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
12894  !Op.getOperand(0).getValueType().isSimple() ||
12895  (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
12896  return SDValue();
12897 
12898  SDValue ConstVec = N->getOperand(1);
12899  if (!isa<BuildVectorSDNode>(ConstVec))
12900  return SDValue();
12901 
12902  MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
12903  int32_t IntBits = IntTy.getSizeInBits();
12904  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
12905  return SDValue();
12906 
12907  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
12908  int32_t FloatBits = FloatTy.getSizeInBits();
12909  if (FloatBits != 32 && FloatBits != 64)
12910  return SDValue();
12911 
12912  // Avoid conversions where iN is larger than the float (e.g., i64 -> float).
12913  if (IntBits > FloatBits)
12914  return SDValue();
12915 
12916  BitVector UndefElements;
12917  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
12918  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
12919  if (C == -1 || C == 0 || C > FloatBits)
12920  return SDValue();
12921 
12922  MVT ResTy;
12923  unsigned NumLanes = Op.getValueType().getVectorNumElements();
12924  switch (NumLanes) {
12925  default:
12926  return SDValue();
12927  case 2:
12928  ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
12929  break;
12930  case 4:
12931  ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
12932  break;
12933  }
12934 
12935  if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
12936  return SDValue();
12937 
12938  SDLoc DL(N);
12939  SDValue ConvInput = Op.getOperand(0);
12940  bool IsSigned = Opc == ISD::SINT_TO_FP;
12941  if (IntBits < FloatBits)
12942  ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
12943  ResTy, ConvInput);
12944 
12945  unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
12946  : Intrinsic::aarch64_neon_vcvtfxu2fp;
12947  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
12948  DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
12949  DAG.getConstant(C, DL, MVT::i32));
12950 }
12951 
12952 /// An EXTR instruction is made up of two shifts, ORed together. This helper
12953 /// searches for and classifies those shifts.
12954 static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
12955  bool &FromHi) {
12956  if (N.getOpcode() == ISD::SHL)
12957  FromHi = false;
12958  else if (N.getOpcode() == ISD::SRL)
12959  FromHi = true;
12960  else
12961  return false;
12962 
12963  if (!isa<ConstantSDNode>(N.getOperand(1)))
12964  return false;
12965 
12966  ShiftAmount = N->getConstantOperandVal(1);
12967  Src = N->getOperand(0);
12968  return true;
12969 }
12970 
12971 /// EXTR instruction extracts a contiguous chunk of bits from two existing
12972 /// registers viewed as a high/low pair. This function looks for the pattern:
12973 /// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
12974 /// with an EXTR. Can't quite be done in TableGen because the two immediates
12975 /// aren't independent.
12978  SelectionDAG &DAG = DCI.DAG;
12979  SDLoc DL(N);
12980  EVT VT = N->getValueType(0);
12981 
12982  assert(N->getOpcode() == ISD::OR && "Unexpected root");
12983 
12984  if (VT != MVT::i32 && VT != MVT::i64)
12985  return SDValue();
12986 
12987  SDValue LHS;
12988  uint32_t ShiftLHS = 0;
12989  bool LHSFromHi = false;
12990  if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
12991  return SDValue();
12992 
12993  SDValue RHS;
12994  uint32_t ShiftRHS = 0;
12995  bool RHSFromHi = false;
12996  if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
12997  return SDValue();
12998 
12999  // If they're both trying to come from the high part of the register, they're
13000  // not really an EXTR.
13001  if (LHSFromHi == RHSFromHi)
13002  return SDValue();
13003 
13004  if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
13005  return SDValue();
13006 
13007  if (LHSFromHi) {
13008  std::swap(LHS, RHS);
13009  std::swap(ShiftLHS, ShiftRHS);
13010  }
13011 
13012  return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
13013  DAG.getConstant(ShiftRHS, DL, MVT::i64));
13014 }
13015 
13018  EVT VT = N->getValueType(0);
13019  SelectionDAG &DAG = DCI.DAG;
13020  SDLoc DL(N);
13021 
13022  if (!VT.isVector())
13023  return SDValue();
13024 
13025  // The combining code currently only works for NEON vectors. In particular,
13026  // it does not work for SVE when dealing with vectors wider than 128 bits.
13027  if (!VT.is64BitVector() && !VT.is128BitVector())
13028  return SDValue();
13029 
13030  SDValue N0 = N->getOperand(0);
13031  if (N0.getOpcode() != ISD::AND)
13032  return SDValue();
13033 
13034  SDValue N1 = N->getOperand(1);
13035  if (N1.getOpcode() != ISD::AND)
13036  return SDValue();
13037 
13038  // InstCombine does (not (neg a)) => (add a -1).
13039  // Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
13040  // Loop over all combinations of AND operands.
13041  for (int i = 1; i >= 0; --i) {
13042  for (int j = 1; j >= 0; --j) {
13043  SDValue O0 = N0->getOperand(i);
13044  SDValue O1 = N1->getOperand(j);
13045  SDValue Sub, Add, SubSibling, AddSibling;
13046 
13047  // Find a SUB and an ADD operand, one from each AND.
13048  if (O0.getOpcode() == ISD::SUB && O1.getOpcode() == ISD::ADD) {
13049  Sub = O0;
13050  Add = O1;
13051  SubSibling = N0->getOperand(1 - i);
13052  AddSibling = N1->getOperand(1 - j);
13053  } else if (O0.getOpcode() == ISD::ADD && O1.getOpcode() == ISD::SUB) {
13054  Add = O0;
13055  Sub = O1;
13056  AddSibling = N0->getOperand(1 - i);
13057  SubSibling = N1->getOperand(1 - j);
13058  } else
13059  continue;
13060 
13062  continue;
13063 
13064  // Constant ones is always righthand operand of the Add.
13065  if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
13066  continue;
13067 
13068  if (Sub.getOperand(1) != Add.getOperand(0))
13069  continue;
13070 
13071  return DAG.getNode(AArch64ISD::BSP, DL, VT, Sub, SubSibling, AddSibling);
13072  }
13073  }
13074 
13075  // (or (and a b) (and (not a) c)) => (bsl a b c)
13076  // We only have to look for constant vectors here since the general, variable
13077  // case can be handled in TableGen.
13078  unsigned Bits = VT.getScalarSizeInBits();
13079  uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
13080  for (int i = 1; i >= 0; --i)
13081  for (int j = 1; j >= 0; --j) {
13082  BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
13083  BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
13084  if (!BVN0 || !BVN1)
13085  continue;
13086 
13087  bool FoundMatch = true;
13088  for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
13089  ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
13090  ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
13091  if (!CN0 || !CN1 ||
13092  CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
13093  FoundMatch = false;
13094  break;
13095  }
13096  }
13097 
13098  if (FoundMatch)
13099  return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0),
13100  N0->getOperand(1 - i), N1->getOperand(1 - j));
13101  }
13102 
13103  return SDValue();
13104 }
13105 
13107  const AArch64Subtarget *Subtarget) {
13108  // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
13109  SelectionDAG &DAG = DCI.DAG;
13110  EVT VT = N->getValueType(0);
13111 
13112  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
13113  return SDValue();
13114 
13115  if (SDValue Res = tryCombineToEXTR(N, DCI))
13116  return Res;
13117 
13118  if (SDValue Res = tryCombineToBSL(N, DCI))
13119  return Res;
13120 
13121  return SDValue();
13122 }
13123 
13125  if (!MemVT.getVectorElementType().isSimple())
13126  return false;
13127 
13128  uint64_t MaskForTy = 0ull;
13129  switch (MemVT.getVectorElementType().getSimpleVT().SimpleTy) {
13130  case MVT::i8:
13131  MaskForTy = 0xffull;
13132  break;
13133  case MVT::i16:
13134  MaskForTy = 0xffffull;
13135  break;
13136  case MVT::i32:
13137  MaskForTy = 0xffffffffull;
13138  break;
13139  default:
13140  return false;
13141  break;
13142  }
13143 
13144  if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR)
13145  if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
13146  return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
13147 
13148  return false;
13149 }
13150 
13153  if (DCI.isBeforeLegalizeOps())
13154  return SDValue();
13155 
13156  SelectionDAG &DAG = DCI.DAG;
13157  SDValue Src = N->getOperand(0);
13158  unsigned Opc = Src->getOpcode();
13159 
13160  // Zero/any extend of an unsigned unpack
13161  if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
13162  SDValue UnpkOp = Src->getOperand(0);
13163  SDValue Dup = N->getOperand(1);
13164 
13165  if (Dup.getOpcode() != AArch64ISD::DUP)
13166  return SDValue();
13167 
13168  SDLoc DL(N);
13169  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
13170  uint64_t ExtVal = C->getZExtValue();
13171 
13172  // If the mask is fully covered by the unpack, we don't need to push
13173  // a new AND onto the operand
13174  EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
13175  if ((ExtVal == 0xFF && EltTy == MVT::i8) ||
13176  (ExtVal == 0xFFFF && EltTy == MVT::i16) ||
13177  (ExtVal == 0xFFFFFFFF && EltTy == MVT::i32))
13178  return Src;
13179 
13180  // Truncate to prevent a DUP with an over wide constant
13181  APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
13182 
13183  // Otherwise, make sure we propagate the AND to the operand
13184  // of the unpack
13185  Dup = DAG.getNode(AArch64ISD::DUP, DL,
13186  UnpkOp->getValueType(0),
13187  DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));
13188 
13189  SDValue And = DAG.getNode(ISD::AND, DL,
13190  UnpkOp->getValueType(0), UnpkOp, Dup);
13191 
13192  return DAG.getNode(Opc, DL, N->getValueType(0), And);
13193  }
13194 
13196  return SDValue();
13197 
13198  SDValue Mask = N->getOperand(1);
13199 
13200  if (!Src.hasOneUse())
13201  return SDValue();
13202 
13203  EVT MemVT;
13204 
13205  // SVE load instructions perform an implicit zero-extend, which makes them
13206  // perfect candidates for combining.
13207  switch (Opc) {
13211  MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
13212  break;
13228  MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
13229  break;
13230  default:
13231  return SDValue();
13232  }
13233 
13234  if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
13235  return Src;
13236 
13237  return SDValue();
13238 }
13239 
13242  SelectionDAG &DAG = DCI.DAG;
13243  SDValue LHS = N->getOperand(0);
13244  EVT VT = N->getValueType(0);
13245  if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13246  return SDValue();
13247 
13248  if (VT.isScalableVector())
13249  return performSVEAndCombine(N, DCI);
13250 
13251  // The combining code below works only for NEON vectors. In particular, it
13252  // does not work for SVE when dealing with vectors wider than 128 bits.
13253  if (!(VT.is64BitVector() || VT.is128BitVector()))
13254  return SDValue();
13255 
13256  BuildVectorSDNode *BVN =
13257  dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
13258  if (!BVN)
13259  return SDValue();
13260 
13261  // AND does not accept an immediate, so check if we can use a BIC immediate
13262  // instruction instead. We do this here instead of using a (and x, (mvni imm))
13263  // pattern in isel, because some immediates may be lowered to the preferred
13264  // (and x, (movi imm)) form, even though an mvni representation also exists.
13265  APInt DefBits(VT.getSizeInBits(), 0);
13266  APInt UndefBits(VT.getSizeInBits(), 0);
13267  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
13268  SDValue NewOp;
13269 
13270  DefBits = ~DefBits;
13271  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
13272  DefBits, &LHS)) ||
13273  (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
13274  DefBits, &LHS)))
13275  return NewOp;
13276 
13277  UndefBits = ~UndefBits;
13278  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
13279  UndefBits, &LHS)) ||
13280  (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
13281  UndefBits, &LHS)))
13282  return NewOp;
13283  }
13284 
13285  return SDValue();
13286 }
13287 
13290  SelectionDAG &DAG = DCI.DAG;
13291  EVT VT = N->getValueType(0);
13292  if (VT != MVT::i32 && VT != MVT::i64)
13293  return SDValue();
13294 
13295  // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the
13296  // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32)
13297  // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
13298  SDValue N0 = N->getOperand(0);
13299  if (N0.getOpcode() == ISD::BSWAP) {
13300  SDLoc DL(N);
13301  SDValue N1 = N->getOperand(1);
13302  SDValue N00 = N0.getOperand(0);
13303  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
13304  uint64_t ShiftAmt = C->getZExtValue();
13305  if (VT == MVT::i32 && ShiftAmt == 16 &&
13306  DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16)))
13307  return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
13308  if (VT == MVT::i64 && ShiftAmt == 32 &&
13309  DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32)))
13310  return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
13311  }
13312  }
13313  return SDValue();
13314 }
13315 
13316 // Attempt to form urhadd(OpA, OpB) from
13317 // truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
13318 // or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
13319 // The original form of the first expression is
13320 // truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the
13321 // (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)).
13322 // Before this function is called the srl will have been lowered to
13323 // AArch64ISD::VLSHR.
13324 // This pass can also recognize signed variants of the patterns that use sign
13325 // extension instead of zero extension and form a srhadd(OpA, OpB) or a
13326 // shadd(OpA, OpB) from them.
13327 static SDValue
13329  SelectionDAG &DAG) {
13330  EVT VT = N->getValueType(0);
13331 
13332  // Since we are looking for a right shift by a constant value of 1 and we are
13333  // operating on types at least 16 bits in length (sign/zero extended OpA and
13334  // OpB, which are at least 8 bits), it follows that the truncate will always
13335  // discard the shifted-in bit and therefore the right shift will be logical
13336  // regardless of the signedness of OpA and OpB.
13337  SDValue Shift = N->getOperand(0);
13338  if (Shift.getOpcode() != AArch64ISD::VLSHR)
13339  return SDValue();
13340 
13341  // Is the right shift using an immediate value of 1?
13342  uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
13343  if (ShiftAmount != 1)
13344  return SDValue();
13345 
13346  SDValue ExtendOpA, ExtendOpB;
13347  SDValue ShiftOp0 = Shift.getOperand(0);
13348  unsigned ShiftOp0Opc = ShiftOp0.getOpcode();
13349  if (ShiftOp0Opc == ISD::SUB) {
13350 
13351  SDValue Xor = ShiftOp0.getOperand(1);
13352  if (Xor.getOpcode() != ISD::XOR)
13353  return SDValue();
13354 
13355  // Is the XOR using a constant amount of all ones in the right hand side?
13356  uint64_t C;
13357  if (!isAllConstantBuildVector(Xor.getOperand(1), C))
13358  return SDValue();
13359 
13360  unsigned ElemSizeInBits = VT.getScalarSizeInBits();
13361  APInt CAsAPInt(ElemSizeInBits, C);
13362  if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
13363  return SDValue();
13364 
13365  ExtendOpA = Xor.getOperand(0);
13366  ExtendOpB = ShiftOp0.getOperand(0);
13367  } else if (ShiftOp0Opc == ISD::ADD) {
13368  ExtendOpA = ShiftOp0.getOperand(0);
13369  ExtendOpB = ShiftOp0.getOperand(1);
13370  } else
13371  return SDValue();
13372 
13373  unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
13374  unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
13375  if (!(ExtendOpAOpc == ExtendOpBOpc &&
13376  (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
13377  return SDValue();
13378 
13379  // Is the result of the right shift being truncated to the same value type as
13380  // the original operands, OpA and OpB?
13381  SDValue OpA = ExtendOpA.getOperand(0);
13382  SDValue OpB = ExtendOpB.getOperand(0);
13383  EVT OpAVT = OpA.getValueType();
13384  assert(ExtendOpA.getValueType() == ExtendOpB.getValueType());
13385  if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
13386  return SDValue();
13387 
13388  SDLoc DL(N);
13389  bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
13390  bool IsRHADD = ShiftOp0Opc == ISD::SUB;
13391  unsigned HADDOpc = IsSignExtend
13392  ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
13393  : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD);
13394  SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB);
13395 
13396  return ResultHADD;
13397 }
13398 
13399 static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
13400  switch (Opcode) {
13401  case ISD::FADD:
13402  return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
13403  case ISD::ADD:
13404  return VT == MVT::i64;
13405  default:
13406  return false;
13407  }
13408 }
13409 
13411  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13412  ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
13413 
13414  EVT VT = N->getValueType(0);
13415  const bool FullFP16 =
13416  static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
13417 
13418  // Rewrite for pairwise fadd pattern
13419  // (f32 (extract_vector_elt
13420  // (fadd (vXf32 Other)
13421  // (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
13422  // ->
13423  // (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
13424  // (extract_vector_elt (vXf32 Other) 1))
13425  if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
13426  hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) {
13427  SDLoc DL(N0);
13428  SDValue N00 = N0->getOperand(0);
13429  SDValue N01 = N0->getOperand(1);
13430 
13431  ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
13432  SDValue Other = N00;
13433 
13434  // And handle the commutative case.
13435  if (!Shuffle) {
13436  Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
13437  Other = N01;
13438  }
13439 
13440  if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
13441  Other == Shuffle->getOperand(0)) {
13442  return DAG.getNode(N0->getOpcode(), DL, VT,
13444  DAG.getConstant(0, DL, MVT::i64)),
13446  DAG.getConstant(1, DL, MVT::i64)));
13447  }
13448  }
13449 
13450  return SDValue();
13451 }
13452 
13455  SelectionDAG &DAG) {
13456  SDLoc dl(N);
13457  EVT VT = N->getValueType(0);
13458  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13459  unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
13460 
13461  // Optimize concat_vectors of truncated vectors, where the intermediate
13462  // type is illegal, to avoid said illegality, e.g.,
13463  // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
13464  // (v2i16 (truncate (v2i64)))))
13465  // ->
13466  // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
13467  // (v4i32 (bitcast (v2i64))),
13468  // <0, 2, 4, 6>)))
13469  // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
13470  // on both input and result type, so we might generate worse code.
13471  // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
13472  if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
13473  N1Opc == ISD::TRUNCATE) {
13474  SDValue N00 = N0->getOperand(0);
13475  SDValue N10 = N1->getOperand(0);
13476  EVT N00VT = N00.getValueType();
13477 
13478  if (N00VT == N10.getValueType() &&
13479  (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
13480  N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
13481  MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
13483  for (size_t i = 0; i < Mask.size(); ++i)
13484  Mask[i] = i * 2;
13485  return DAG.getNode(ISD::TRUNCATE, dl, VT,
13486  DAG.getVectorShuffle(
13487  MidVT, dl,
13488  DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
13489  DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
13490  }
13491  }
13492 
13493  // Wait 'til after everything is legalized to try this. That way we have
13494  // legal vector types and such.
13495  if (DCI.isBeforeLegalizeOps())
13496  return SDValue();
13497 
13498  // Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted
13499  // subvectors from the same original vectors. Combine these into a single
13500  // [us]rhadd or [us]hadd that operates on the two original vectors. Example:
13501  // (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
13502  // extract_subvector (v16i8 OpB,
13503  // <0>))),
13504  // (v8i8 (urhadd (extract_subvector (v16i8 OpA, <8>),
13505  // extract_subvector (v16i8 OpB,
13506  // <8>)))))
13507  // ->
13508  // (v16i8(urhadd(v16i8 OpA, v16i8 OpB)))
13509  if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
13510  (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD ||
13511  N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) {
13512  SDValue N00 = N0->getOperand(0);
13513  SDValue N01 = N0->getOperand(1);
13514  SDValue N10 = N1->getOperand(0);
13515  SDValue N11 = N1->getOperand(1);
13516 
13517  EVT N00VT = N00.getValueType();
13518  EVT N10VT = N10.getValueType();
13519 
13520  if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13521  N01->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13522  N10->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13523  N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) {
13524  SDValue N00Source = N00->getOperand(0);
13525  SDValue N01Source = N01->getOperand(0);
13526  SDValue N10Source = N10->getOperand(0);
13527  SDValue N11Source = N11->getOperand(0);
13528 
13529  if (N00Source == N10Source && N01Source == N11Source &&
13530  N00Source.getValueType() == VT && N01Source.getValueType() == VT) {
13531  assert(N0.getValueType() == N1.getValueType());
13532 
13533  uint64_t N00Index = N00.getConstantOperandVal(1);
13534  uint64_t N01Index = N01.getConstantOperandVal(1);
13535  uint64_t N10Index = N10.getConstantOperandVal(1);
13536  uint64_t N11Index = N11.getConstantOperandVal(1);
13537 
13538  if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
13539  N10Index == N00VT.getVectorNumElements())
13540  return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source);
13541  }
13542  }
13543  }
13544 
13545  // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
13546  // splat. The indexed instructions are going to be expecting a DUPLANE64, so
13547  // canonicalise to that.
13548  if (N0 == N1 && VT.getVectorNumElements() == 2) {
13549  assert(VT.getScalarSizeInBits() == 64);
13550  return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
13551  DAG.getConstant(0, dl, MVT::i64));
13552  }
13553 
13554  // Canonicalise concat_vectors so that the right-hand vector has as few
13555  // bit-casts as possible before its real operation. The primary matching
13556  // destination for these operations will be the narrowing "2" instructions,
13557  // which depend on the operation being performed on this right-hand vector.
13558  // For example,
13559  // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
13560  // becomes
13561  // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
13562 
13563  if (N1Opc != ISD::BITCAST)
13564  return SDValue();
13565  SDValue RHS = N1->getOperand(0);
13566  MVT RHSTy = RHS.getValueType().getSimpleVT();
13567  // If the RHS is not a vector, this is not the pattern we're looking for.
13568  if (!RHSTy.isVector())
13569  return SDValue();
13570 
13571  LLVM_DEBUG(
13572  dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
13573 
13574  MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
13575  RHSTy.getVectorNumElements() * 2);
13576  return DAG.getNode(ISD::BITCAST, dl, VT,
13577  DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
13578  DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
13579  RHS));
13580 }
13581 
13584  SelectionDAG &DAG) {
13585  // Wait until after everything is legalized to try this. That way we have
13586  // legal vector types and such.
13587  if (DCI.isBeforeLegalizeOps())
13588  return SDValue();
13589  // Transform a scalar conversion of a value from a lane extract into a
13590  // lane extract of a vector conversion. E.g., from foo1 to foo2:
13591  // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
13592  // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
13593  //
13594  // The second form interacts better with instruction selection and the
13595  // register allocator to avoid cross-class register copies that aren't
13596  // coalescable due to a lane reference.
13597 
13598  // Check the operand and see if it originates from a lane extract.
13599  SDValue Op1 = N->getOperand(1);
13600  if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
13601  // Yep, no additional predication needed. Perform the transform.
13602  SDValue IID = N->getOperand(0);
13603  SDValue Shift = N->getOperand(2);
13604  SDValue Vec = Op1.getOperand(0);
13605  SDValue Lane = Op1.getOperand(1);
13606  EVT ResTy = N->getValueType(0);
13607  EVT VecResTy;
13608  SDLoc DL(N);
13609 
13610  // The vector width should be 128 bits by the time we get here, even
13611  // if it started as 64 bits (the extract_vector handling will have
13612  // done so).
13613  assert(Vec.getValueSizeInBits() == 128 &&
13614  "unexpected vector size on extract_vector_elt!");
13615  if (Vec.getValueType() == MVT::v4i32)
13616  VecResTy = MVT::v4f32;
13617  else if (Vec.getValueType() == MVT::v2i64)
13618  VecResTy = MVT::v2f64;
13619  else
13620  llvm_unreachable("unexpected vector type!");
13621 
13622  SDValue Convert =
13623  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
13624  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
13625  }
13626  return SDValue();
13627 }
13628 
13629 // AArch64 high-vector "long" operations are formed by performing the non-high
13630 // version on an extract_subvector of each operand which gets the high half:
13631 //
13632 // (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
13633 //
13634 // However, there are cases which don't have an extract_high explicitly, but
13635 // have another operation that can be made compatible with one for free. For
13636 // example:
13637 //
13638 // (dupv64 scalar) --> (extract_high (dup128 scalar))
13639 //
13640 // This routine does the actual conversion of such DUPs, once outer routines
13641 // have determined that everything else is in order.
13642 // It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
13643 // similarly here.
13645  switch (N.getOpcode()) {
13646  case AArch64ISD::DUP:
13647  case AArch64ISD::DUPLANE8:
13648  case AArch64ISD::DUPLANE16:
13649  case AArch64ISD::DUPLANE32:
13650  case AArch64ISD::DUPLANE64:
13651  case AArch64ISD::MOVI:
13652  case AArch64ISD::MOVIshift:
13653  case AArch64ISD::MOVIedit:
13654  case AArch64ISD::MOVImsl:
13655  case AArch64ISD::MVNIshift:
13656  case AArch64ISD::MVNImsl:
13657  break;
13658  default:
13659  // FMOV could be supported, but isn't very useful, as it would only occur
13660  // if you passed a bitcast' floating point immediate to an eligible long
13661  // integer op (addl, smull, ...).
13662  return SDValue();
13663  }
13664 
13665  MVT NarrowTy = N.getSimpleValueType();
13666  if (!NarrowTy.is64BitVector())
13667  return SDValue();
13668 
13669  MVT ElementTy = NarrowTy.getVectorElementType();
13670  unsigned NumElems = NarrowTy.getVectorNumElements();
13671  MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
13672 
13673  SDLoc dl(N);
13674  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
13675  DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
13676  DAG.getConstant(NumElems, dl, MVT::i64));
13677 }
13678 
13680  if (N.getOpcode() == ISD::BITCAST)
13681  N = N.getOperand(0);
13682  if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13683  return false;
13684  if (N.getOperand(0).getValueType().isScalableVector())
13685  return false;
13686  return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
13687  N.getOperand(0).getValueType().getVectorNumElements() / 2;
13688 }
13689 
13690 /// Helper structure to keep track of ISD::SET_CC operands.
13692  const SDValue *Opnd0;
13693  const SDValue *Opnd1;
13695 };
13696 
13697 /// Helper structure to keep track of a SET_CC lowered into AArch64 code.
13699  const SDValue *Cmp;
13701 };
13702 
13703 /// Helper structure to keep track of SetCC information.
13704 union SetCCInfo {
13707 };
13708 
13709 /// Helper structure to be able to read SetCC information. If set to
13710 /// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
13711 /// GenericSetCCInfo.
13715 };
13716 
13717 /// Check whether or not \p Op is a SET_CC operation, either a generic or
13718 /// an
13719 /// AArch64 lowered one.
13720 /// \p SetCCInfo is filled accordingly.
13721 /// \post SetCCInfo is meanginfull only when this function returns true.
13722 /// \return True when Op is a kind of SET_CC operation.
13724  // If this is a setcc, this is straight forward.
13725  if (Op.getOpcode() == ISD::SETCC) {
13726  SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
13727  SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
13728  SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13729  SetCCInfo.IsAArch64 = false;
13730  return true;
13731  }
13732  // Otherwise, check if this is a matching csel instruction.
13733  // In other words:
13734  // - csel 1, 0, cc
13735  // - csel 0, 1, !cc
13736  if (Op.getOpcode() != AArch64ISD::CSEL)
13737  return false;
13738  // Set the information about the operands.
13739  // TODO: we want the operands of the Cmp not the csel
13740  SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
13741  SetCCInfo.IsAArch64 = true;
13742  SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
13743  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
13744 
13745  // Check that the operands matches the constraints:
13746  // (1) Both operands must be constants.
13747  // (2) One must be 1 and the other must be 0.
13748  ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0));
13749  ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1));
13750 
13751  // Check (1).
13752  if (!TValue || !FValue)
13753  return false;
13754 
13755  // Check (2).
13756  if (!TValue->isOne()) {
13757  // Update the comparison when we are interested in !cc.
13758  std::swap(TValue, FValue);
13759  SetCCInfo.Info.AArch64.CC =
13761  }
13762  return TValue->isOne() && FValue->isNullValue();
13763 }
13764 
13765 // Returns true if Op is setcc or zext of setcc.
13767  if (isSetCC(Op, Info))
13768  return true;
13769  return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
13770  isSetCC(Op->getOperand(0), Info));
13771 }
13772 
13773 // The folding we want to perform is:
13774 // (add x, [zext] (setcc cc ...) )
13775 // -->
13776 // (csel x, (add x, 1), !cc ...)
13777 //
13778 // The latter will get matched to a CSINC instruction.
13780  assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
13781  SDValue LHS = Op->getOperand(0);
13782  SDValue RHS = Op->getOperand(1);
13783  SetCCInfoAndKind InfoAndKind;
13784 
13785  // If both operands are a SET_CC, then we don't want to perform this
13786  // folding and create another csel as this results in more instructions
13787  // (and higher register usage).
13788  if (isSetCCOrZExtSetCC(LHS, InfoAndKind) &&
13789  isSetCCOrZExtSetCC(RHS, InfoAndKind))
13790  return SDValue();
13791 
13792  // If neither operand is a SET_CC, give up.
13793  if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
13794  std::swap(LHS, RHS);
13795  if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
13796  return SDValue();
13797  }
13798 
13799  // FIXME: This could be generatized to work for FP comparisons.
13800  EVT CmpVT = InfoAndKind.IsAArch64
13801  ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
13802  : InfoAndKind.Info.Generic.Opnd0->getValueType();
13803  if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
13804  return SDValue();
13805 
13806  SDValue CCVal;
13807  SDValue Cmp;
13808  SDLoc dl(Op);
13809  if (InfoAndKind.IsAArch64) {
13810  CCVal = DAG.getConstant(
13811  AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
13812  MVT::i32);
13813  Cmp = *InfoAndKind.Info.AArch64.Cmp;
13814  } else
13815  Cmp = getAArch64Cmp(
13816  *InfoAndKind.Info.Generic.Opnd0, *InfoAndKind.Info.Generic.Opnd1,
13817  ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
13818  dl);
13819 
13820  EVT VT = Op->getValueType(0);
13821  LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
13822  return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
13823 }
13824 
13825 // ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
13827  EVT VT = N->getValueType(0);
13828  // Only scalar integer and vector types.
13829  if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
13830  return SDValue();
13831 
13832  SDValue LHS = N->getOperand(0);
13833  SDValue RHS = N->getOperand(1);
13834  if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13835  RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT)
13836  return SDValue();
13837 
13838  auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
13839  auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
13840  if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue())
13841  return SDValue();
13842 
13843  SDValue Op1 = LHS->getOperand(0);
13844  SDValue Op2 = RHS->getOperand(0);
13845  EVT OpVT1 = Op1.getValueType();
13846  EVT OpVT2 = Op2.getValueType();
13847  if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
13848  Op2.getOpcode() != AArch64ISD::UADDV ||
13849  OpVT1.getVectorElementType() != VT)
13850  return SDValue();
13851 
13852  SDValue Val1 = Op1.getOperand(0);
13853  SDValue Val2 = Op2.getOperand(0);
13854  EVT ValVT = Val1->getValueType(0);
13855  SDLoc DL(N);
13856  SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
13857  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
13858  DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
13859  DAG.getConstant(0, DL, MVT::i64));
13860 }
13861 
13862 // ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
13864  EVT VT = N->getValueType(0);
13865  if (N->getOpcode() != ISD::ADD)
13866  return SDValue();
13867 
13868  SDValue Dot = N->getOperand(0);
13869  SDValue A = N->getOperand(1);
13870  // Handle commutivity
13871  auto isZeroDot = [](SDValue Dot) {
13872  return (Dot.getOpcode() == AArch64ISD::UDOT ||
13873  Dot.getOpcode() == AArch64ISD::SDOT) &&
13874  isZerosVector(Dot.getOperand(0).getNode());
13875  };
13876  if (!isZeroDot(Dot))
13877  std::swap(Dot, A);
13878  if (!isZeroDot(Dot))
13879  return SDValue();
13880 
13881  return DAG.getNode(Dot.getOpcode(), SDLoc(N), VT, A, Dot.getOperand(1),
13882  Dot.getOperand(2));
13883 }
13884 
13885 // The basic add/sub long vector instructions have variants with "2" on the end
13886 // which act on the high-half of their inputs. They are normally matched by
13887 // patterns like:
13888 //
13889 // (add (zeroext (extract_high LHS)),
13890 // (zeroext (extract_high RHS)))
13891 // -> uaddl2 vD, vN, vM
13892 //
13893 // However, if one of the extracts is something like a duplicate, this
13894 // instruction can still be used profitably. This function puts the DAG into a
13895 // more appropriate form for those patterns to trigger.
13898  SelectionDAG &DAG) {
13899  if (DCI.isBeforeLegalizeOps())
13900  return SDValue();
13901 
13902  MVT VT = N->getSimpleValueType(0);
13903  if (!VT.is128BitVector()) {
13904  if (N->getOpcode() == ISD::ADD)
13905  return performSetccAddFolding(N, DAG);
13906  return SDValue();
13907  }
13908 
13909  // Make sure both branches are extended in the same way.
13910  SDValue LHS = N->getOperand(0);
13911  SDValue RHS = N->getOperand(1);
13912  if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
13913  LHS.getOpcode() != ISD::SIGN_EXTEND) ||
13914  LHS.getOpcode() != RHS.getOpcode())
13915  return SDValue();
13916 
13917  unsigned ExtType = LHS.getOpcode();
13918 
13919  // It's not worth doing if at least one of the inputs isn't already an
13920  // extract, but we don't know which it'll be so we have to try both.
13922  RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
13923  if (!RHS.getNode())
13924  return SDValue();
13925 
13926  RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
13927  } else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
13928  LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
13929  if (!LHS.getNode())
13930  return SDValue();
13931 
13932  LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
13933  }
13934 
13935  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
13936 }
13937 
13940  SelectionDAG &DAG) {
13941  // Try to change sum of two reductions.
13942  if (SDValue Val = performUADDVCombine(N, DAG))
13943  return Val;
13944  if (SDValue Val = performAddDotCombine(N, DAG))
13945  return Val;
13946 
13947  return performAddSubLongCombine(N, DCI, DAG);
13948 }
13949 
13950 // Massage DAGs which we can use the high-half "long" operations on into
13951 // something isel will recognize better. E.g.
13952 //
13953 // (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
13954 // (aarch64_neon_umull (extract_high (v2i64 vec)))
13955 // (extract_high (v2i64 (dup128 scalar)))))
13956 //
13957 static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
13959  SelectionDAG &DAG) {
13960  if (DCI.isBeforeLegalizeOps())
13961  return SDValue();
13962 
13963  SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
13964  SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
13965  assert(LHS.getValueType().is64BitVector() &&
13966  RHS.getValueType().is64BitVector() &&
13967  "unexpected shape for long operation");
13968 
13969  // Either node could be a DUP, but it's not worth doing both of them (you'd
13970  // just as well use the non-high version) so look for a corresponding extract
13971  // operation on the other "wing".
13973  RHS = tryExtendDUPToExtractHigh(RHS, DAG);
13974  if (!RHS.getNode())
13975  return SDValue();
13976  } else if (isEssentiallyExtractHighSubvector(RHS)) {
13977  LHS = tryExtendDUPToExtractHigh(LHS, DAG);
13978  if (!LHS.getNode())
13979  return SDValue();
13980  }
13981 
13982  if (IID == Intrinsic::not_intrinsic)
13983  return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
13984 
13985  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
13986  N->getOperand(0), LHS, RHS);
13987 }
13988 
13989 static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
13990  MVT ElemTy = N->getSimpleValueType(0).getScalarType();
13991  unsigned ElemBits = ElemTy.getSizeInBits();
13992 
13993  int64_t ShiftAmount;
13994  if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
13995  APInt SplatValue, SplatUndef;
13996  unsigned SplatBitSize;
13997  bool HasAnyUndefs;
13998  if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
13999  HasAnyUndefs, ElemBits) ||
14000  SplatBitSize != ElemBits)
14001  return SDValue();
14002 
14003  ShiftAmount = SplatValue.getSExtValue();
14004  } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
14005  ShiftAmount = CVN->getSExtValue();
14006  } else
14007  return SDValue();
14008 
14009  unsigned Opcode;
14010  bool IsRightShift;
14011  switch (IID) {
14012  default:
14013  llvm_unreachable("Unknown shift intrinsic");
14014  case Intrinsic::aarch64_neon_sqshl:
14015  Opcode = AArch64ISD::SQSHL_I;
14016  IsRightShift = false;
14017  break;
14018  case Intrinsic::aarch64_neon_uqshl:
14019  Opcode = AArch64ISD::UQSHL_I;
14020  IsRightShift = false;
14021  break;
14022  case Intrinsic::aarch64_neon_srshl:
14023  Opcode = AArch64ISD::SRSHR_I;
14024  IsRightShift = true;
14025  break;
14026  case Intrinsic::aarch64_neon_urshl:
14027  Opcode = AArch64ISD::URSHR_I;
14028  IsRightShift = true;
14029  break;
14030  case Intrinsic::aarch64_neon_sqshlu:
14031  Opcode = AArch64ISD::SQSHLU_I;
14032  IsRightShift = false;
14033  break;
14034  case Intrinsic::aarch64_neon_sshl:
14035  case Intrinsic::aarch64_neon_ushl:
14036  // For positive shift amounts we can use SHL, as ushl/sshl perform a regular
14037  // left shift for positive shift amounts. Below, we only replace the current
14038  // node with VSHL, if this condition is met.
14039  Opcode = AArch64ISD::VSHL;
14040  IsRightShift = false;
14041  break;
14042  }
14043 
14044  if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
14045  SDLoc dl(N);
14046  return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
14047  DAG.getConstant(-ShiftAmount, dl, MVT::i32));
14048  } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
14049  SDLoc dl(N);
14050  return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
14051  DAG.getConstant(ShiftAmount, dl, MVT::i32));
14052  }
14053 
14054  return SDValue();
14055 }
14056 
14057 // The CRC32[BH] instructions ignore the high bits of their data operand. Since
14058 // the intrinsics must be legal and take an i32, this means there's almost
14059 // certainly going to be a zext in the DAG which we can eliminate.
14060 static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
14061  SDValue AndN = N->getOperand(2);
14062  if (AndN.getOpcode() != ISD::AND)
14063  return SDValue();
14064 
14065  ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
14066  if (!CMask || CMask->getZExtValue() != Mask)
14067  return SDValue();
14068 
14070  N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
14071 }
14072 
14074  SelectionDAG &DAG) {
14075  SDLoc dl(N);
14076  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
14077  DAG.getNode(Opc, dl,
14078  N->getOperand(1).getSimpleValueType(),
14079  N->getOperand(1)),
14080  DAG.getConstant(0, dl, MVT::i64));
14081 }
14082 
14084  SDLoc DL(N);
14085  SDValue Op1 = N->getOperand(1);
14086  SDValue Op2 = N->getOperand(2);
14087  EVT ScalarTy = Op2.getValueType();
14088  if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
14089  ScalarTy = MVT::i32;
14090 
14091  // Lower index_vector(base, step) to mul(step step_vector(1)) + splat(base).
14092  SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0));
14093  SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
14094  SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
14095  SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
14096  return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
14097 }
14098 
14100  SDLoc dl(N);
14101  SDValue Scalar = N->getOperand(3);
14102  EVT ScalarTy = Scalar.getValueType();
14103 
14104  if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
14106 
14107  SDValue Passthru = N->getOperand(1);
14108  SDValue Pred = N->getOperand(2);
14109  return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
14110  Pred, Scalar, Passthru);
14111 }
14112 
14114  SDLoc dl(N);
14115  LLVMContext &Ctx = *DAG.getContext();
14116  EVT VT = N->getValueType(0);
14117 
14118  assert(VT.isScalableVector() && "Expected a scalable vector.");
14119 
14120  // Current lowering only supports the SVE-ACLE types.
14122  return SDValue();
14123 
14124  unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
14125  unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8;
14126  EVT ByteVT =
14128 
14129  // Convert everything to the domain of EXT (i.e bytes).
14130  SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
14131  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
14132  SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
14133  DAG.getConstant(ElemSize, dl, MVT::i32));
14134 
14135  SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2);
14136  return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
14137 }
14138 
14141  SelectionDAG &DAG) {
14142  if (DCI.isBeforeLegalize())
14143  return SDValue();
14144 
14145  SDValue Comparator = N->getOperand(3);
14146  if (Comparator.getOpcode() == AArch64ISD::DUP ||
14147  Comparator.getOpcode() == ISD::SPLAT_VECTOR) {
14148  unsigned IID = getIntrinsicID(N);
14149  EVT VT = N->getValueType(0);
14150  EVT CmpVT = N->getOperand(2).getValueType();
14151  SDValue Pred = N->getOperand(1);
14152  SDValue Imm;
14153  SDLoc DL(N);
14154 
14155  switch (IID) {
14156  default:
14157  llvm_unreachable("Called with wrong intrinsic!");
14158  break;
14159 
14160  // Signed comparisons
14161  case Intrinsic::aarch64_sve_cmpeq_wide:
14162  case Intrinsic::aarch64_sve_cmpne_wide:
14163  case Intrinsic::aarch64_sve_cmpge_wide:
14164  case Intrinsic::aarch64_sve_cmpgt_wide:
14165  case Intrinsic::aarch64_sve_cmplt_wide:
14166  case Intrinsic::aarch64_sve_cmple_wide: {
14167  if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
14168  int64_t ImmVal = CN->getSExtValue();
14169  if (ImmVal >= -16 && ImmVal <= 15)
14170  Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
14171  else
14172  return SDValue();
14173  }
14174  break;
14175  }
14176  // Unsigned comparisons
14177  case Intrinsic::aarch64_sve_cmphs_wide:
14178  case Intrinsic::aarch64_sve_cmphi_wide:
14179  case Intrinsic::aarch64_sve_cmplo_wide:
14180  case Intrinsic::aarch64_sve_cmpls_wide: {
14181  if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
14182  uint64_t ImmVal = CN->getZExtValue();
14183  if (ImmVal <= 127)
14184  Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
14185  else
14186  return SDValue();
14187  }
14188  break;
14189  }
14190  }
14191 
14192  if (!Imm)
14193  return SDValue();
14194 
14195  SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
14196  return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred,
14197  N->getOperand(2), Splat, DAG.getCondCode(CC));
14198  }
14199 
14200  return SDValue();
14201 }
14202 
14205  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14206 
14207  SDLoc DL(Op);
14208  assert(Op.getValueType().isScalableVector() &&
14209  TLI.isTypeLegal(Op.getValueType()) &&
14210  "Expected legal scalable vector type!");
14211 
14212  // Ensure target specific opcodes are using legal type.
14213  EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
14214  SDValue TVal = DAG.getConstant(1, DL, OutVT);
14215  SDValue FVal = DAG.getConstant(0, DL, OutVT);
14216 
14217  // Set condition code (CC) flags.
14218  SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);
14219 
14220  // Convert CC to integer based on requested condition.
14221  // NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
14223  SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
14224  return DAG.getZExtOrTrunc(Res, DL, VT);
14225 }
14226 
14227 static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
14228  SelectionDAG &DAG) {
14229  SDLoc DL(N);
14230 
14231  SDValue Pred = N->getOperand(1);
14232  SDValue VecToReduce = N->getOperand(2);
14233 
14234  // NOTE: The integer reduction's result type is not always linked to the
14235  // operand's element type so we construct it from the intrinsic's result type.
14236  EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
14237  SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
14238 
14239  // SVE reductions set the whole vector register with the first element
14240  // containing the reduction result, which we'll now extract.
14241  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14242  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
14243  Zero);
14244 }
14245 
14246 static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
14247  SelectionDAG &DAG) {
14248  SDLoc DL(N);
14249 
14250  SDValue Pred = N->getOperand(1);
14251  SDValue VecToReduce = N->getOperand(2);
14252 
14253  EVT ReduceVT = VecToReduce.getValueType();
14254  SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
14255 
14256  // SVE reductions set the whole vector register with the first element
14257  // containing the reduction result, which we'll now extract.
14258  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14259  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
14260  Zero);
14261 }
14262 
14264  SelectionDAG &DAG) {
14265  SDLoc DL(N);
14266 
14267  SDValue Pred = N->getOperand(1);
14268  SDValue InitVal = N->getOperand(2);
14269  SDValue VecToReduce = N->getOperand(3);
14270  EVT ReduceVT = VecToReduce.getValueType();
14271 
14272  // Ordered reductions use the first lane of the result vector as the
14273  // reduction's initial value.
14274  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14275  InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT,
14276  DAG.getUNDEF(ReduceVT), InitVal, Zero);
14277 
14278  SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce);
14279 
14280  // SVE reductions set the whole vector register with the first element
14281  // containing the reduction result, which we'll now extract.
14282  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
14283  Zero);
14284 }
14285 
14287  unsigned NumElts = N.getValueType().getVectorMinNumElements();
14288 
14289  // Look through cast.
14290  while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
14291  N = N.getOperand(0);
14292  // When reinterpreting from a type with fewer elements the "new" elements
14293  // are not active, so bail if they're likely to be used.
14294  if (N.getValueType().getVectorMinNumElements() < NumElts)
14295  return false;
14296  }
14297 
14298  // "ptrue p.<ty>, all" can be considered all active when <ty> is the same size
14299  // or smaller than the implicit element type represented by N.
14300  // NOTE: A larger element count implies a smaller element type.
14301  if (N.getOpcode() == AArch64ISD::PTRUE &&
14302  N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
14303  return N.getValueType().getVectorMinNumElements() >= NumElts;
14304 
14305  return false;
14306 }
14307 
14308 // If a merged operation has no inactive lanes we can relax it to a predicated
14309 // or unpredicated operation, which potentially allows better isel (perhaps
14310 // using immediate forms) or relaxing register reuse requirements.
14311 static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
14312  SelectionDAG &DAG,
14313  bool UnpredOp = false) {
14314  assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
14315  assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
14316  SDValue Pg = N->getOperand(1);
14317 
14318  // ISD way to specify an all active predicate.
14319  if (isAllActivePredicate(Pg)) {
14320  if (UnpredOp)
14321  return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), N->getOperand(2),
14322  N->getOperand(3));
14323  else
14324  return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg,
14325  N->getOperand(2), N->getOperand(3));
14326  }
14327 
14328  // FUTURE: SplatVector(true)
14329  return SDValue();
14330 }
14331 
14334  const AArch64Subtarget *Subtarget) {
14335  SelectionDAG &DAG = DCI.DAG;
14336  unsigned IID = getIntrinsicID(N);
14337  switch (IID) {
14338  default:
14339  break;
14340  case Intrinsic::aarch64_neon_vcvtfxs2fp:
14341  case Intrinsic::aarch64_neon_vcvtfxu2fp:
14342  return tryCombineFixedPointConvert(N, DCI, DAG);
14343  case Intrinsic::aarch64_neon_saddv:
14345  case Intrinsic::aarch64_neon_uaddv:
14347  case Intrinsic::aarch64_neon_sminv:
14349  case Intrinsic::aarch64_neon_uminv:
14351  case Intrinsic::aarch64_neon_smaxv:
14353  case Intrinsic::aarch64_neon_umaxv:
14355  case Intrinsic::aarch64_neon_fmax:
14356  return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
14357  N->getOperand(1), N->getOperand(2));
14358  case Intrinsic::aarch64_neon_fmin:
14359  return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
14360  N->getOperand(1), N->getOperand(2));
14361  case Intrinsic::aarch64_neon_fmaxnm:
14362  return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
14363  N->getOperand(1), N->getOperand(2));
14364  case Intrinsic::aarch64_neon_fminnm:
14365  return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
14366  N->getOperand(1), N->getOperand(2));
14367  case Intrinsic::aarch64_neon_smull:
14368  case Intrinsic::aarch64_neon_umull:
14369  case Intrinsic::aarch64_neon_pmull:
14370  case Intrinsic::aarch64_neon_sqdmull:
14371  return tryCombineLongOpWithDup(IID, N, DCI, DAG);
14372  case Intrinsic::aarch64_neon_sqshl:
14373  case Intrinsic::aarch64_neon_uqshl:
14374  case Intrinsic::aarch64_neon_sqshlu:
14375  case Intrinsic::aarch64_neon_srshl:
14376  case Intrinsic::aarch64_neon_urshl:
14377  case Intrinsic::aarch64_neon_sshl:
14378  case Intrinsic::aarch64_neon_ushl:
14379  return tryCombineShiftImm(IID, N, DAG);
14380  case Intrinsic::aarch64_crc32b:
14381  case Intrinsic::aarch64_crc32cb:
14382  return tryCombineCRC32(0xff, N, DAG);
14383  case Intrinsic::aarch64_crc32h:
14384  case Intrinsic::aarch64_crc32ch:
14385  return tryCombineCRC32(0xffff, N, DAG);
14386  case Intrinsic::aarch64_sve_saddv:
14387  // There is no i64 version of SADDV because the sign is irrelevant.
14388  if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
14390  else
14392  case Intrinsic::aarch64_sve_uaddv:
14394  case Intrinsic::aarch64_sve_smaxv:
14396  case Intrinsic::aarch64_sve_umaxv:
14398  case Intrinsic::aarch64_sve_sminv:
14400  case Intrinsic::aarch64_sve_uminv:
14402  case Intrinsic::aarch64_sve_orv:
14404  case Intrinsic::aarch64_sve_eorv:
14406  case Intrinsic::aarch64_sve_andv:
14408  case Intrinsic::aarch64_sve_index:
14409  return LowerSVEIntrinsicIndex(N, DAG);
14410  case Intrinsic::aarch64_sve_dup:
14411  return LowerSVEIntrinsicDUP(N, DAG);
14412  case Intrinsic::aarch64_sve_dup_x:
14413  return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
14414  N->getOperand(1));
14415  case Intrinsic::aarch64_sve_ext:
14416  return LowerSVEIntrinsicEXT(N, DAG);
14417  case Intrinsic::aarch64_sve_mul:
14419  case Intrinsic::aarch64_sve_smulh:
14421  case Intrinsic::aarch64_sve_umulh:
14423  case Intrinsic::aarch64_sve_smin:
14425  case Intrinsic::aarch64_sve_umin:
14427  case Intrinsic::aarch64_sve_smax:
14429  case Intrinsic::aarch64_sve_umax:
14431  case Intrinsic::aarch64_sve_lsl:
14433  case Intrinsic::aarch64_sve_lsr:
14435  case Intrinsic::aarch64_sve_asr:
14437  case Intrinsic::aarch64_sve_fadd:
14439  case Intrinsic::aarch64_sve_fsub:
14441  case Intrinsic::aarch64_sve_fmul:
14443  case Intrinsic::aarch64_sve_add:
14444  return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
14445  case Intrinsic::aarch64_sve_sub:
14446  return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
14447  case Intrinsic::aarch64_sve_and:
14448  return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
14449  case Intrinsic::aarch64_sve_bic:
14450  return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
14451  case Intrinsic::aarch64_sve_eor:
14452  return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
14453  case Intrinsic::aarch64_sve_orr:
14454  return convertMergedOpToPredOp(N, ISD::OR, DAG, true);
14455  case Intrinsic::aarch64_sve_sqadd:
14456  return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true);
14457  case Intrinsic::aarch64_sve_sqsub:
14458  return convertMergedOpToPredOp(N, ISD::SSUBSAT, DAG, true);
14459  case Intrinsic::aarch64_sve_uqadd:
14460  return convertMergedOpToPredOp(N, ISD::UADDSAT, DAG, true);
14461  case Intrinsic::aarch64_sve_uqsub:
14462  return convertMergedOpToPredOp(N, ISD::USUBSAT, DAG, true);
14463  case Intrinsic::aarch64_sve_sqadd_x:
14464  return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
14465  N->getOperand(1), N->getOperand(2));
14466  case Intrinsic::aarch64_sve_sqsub_x:
14467  return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
14468  N->getOperand(1), N->getOperand(2));
14469  case Intrinsic::aarch64_sve_uqadd_x:
14470  return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
14471  N->getOperand(1), N->getOperand(2));
14472  case Intrinsic::aarch64_sve_uqsub_x:
14473  return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
14474  N->getOperand(1), N->getOperand(2));
14475  case Intrinsic::aarch64_sve_cmphs:
14476  if (!N->getOperand(2).getValueType().isFloatingPoint())
14478  N->getValueType(0), N->getOperand(1), N->getOperand(2),
14479  N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
14480  break;
14481  case Intrinsic::aarch64_sve_cmphi:
14482  if (!N->getOperand(2).getValueType().isFloatingPoint())
14484  N->getValueType(0), N->getOperand(1), N->getOperand(2),
14485  N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
14486  break;
14487  case Intrinsic::aarch64_sve_fcmpge:
14488  case Intrinsic::aarch64_sve_cmpge:
14490  N->getValueType(0), N->getOperand(1), N->getOperand(2),
14491  N->getOperand(3), DAG.getCondCode(ISD::SETGE));
14492  break;
14493  case Intrinsic::aarch64_sve_fcmpgt:
14494  case Intrinsic::aarch64_sve_cmpgt:
14496  N->getValueType(0), N->getOperand(1), N->getOperand(2),
14497  N->getOperand(3), DAG.getCondCode(ISD::SETGT));
14498  break;
14499  case Intrinsic::aarch64_sve_fcmpeq:
14500  case Intrinsic::aarch64_sve_cmpeq:
14502  N->getValueType(0), N->getOperand(1), N->getOperand(2),
14503  N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
14504  break;
14505  case Intrinsic::aarch64_sve_fcmpne:
14506  case Intrinsic::aarch64_sve_cmpne:
14508  N->getValueType(0), N->getOperand(1), N->getOperand(2),
14509  N->getOperand(3), DAG.getCondCode(ISD::SETNE));
14510  break;
14511  case Intrinsic::aarch64_sve_fcmpuo:
14513  N->getValueType(0), N->getOperand(1), N->getOperand(2),
14514  N->getOperand(3), DAG.getCondCode(ISD::SETUO));
14515  break;
14516  case Intrinsic::aarch64_sve_fadda:
14518  case Intrinsic::aarch64_sve_faddv:
14520  case Intrinsic::aarch64_sve_fmaxnmv:
14522  case Intrinsic::aarch64_sve_fmaxv:
14524  case Intrinsic::aarch64_sve_fminnmv:
14526  case Intrinsic::aarch64_sve_fminv:
14528  case Intrinsic::aarch64_sve_sel:
14529  return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
14530  N->getOperand(1), N->getOperand(2), N->getOperand(3));
14531  case Intrinsic::aarch64_sve_cmpeq_wide:
14532  return tryConvertSVEWideCompare(N, ISD::SETEQ, DCI, DAG);
14533  case Intrinsic::aarch64_sve_cmpne_wide:
14534  return tryConvertSVEWideCompare(N, ISD::SETNE, DCI, DAG);
14535  case Intrinsic::aarch64_sve_cmpge_wide:
14536  return tryConvertSVEWideCompare(N, ISD::SETGE, DCI, DAG);
14537  case Intrinsic::aarch64_sve_cmpgt_wide:
14538  return tryConvertSVEWideCompare(N, ISD::SETGT, DCI, DAG);
14539  case Intrinsic::aarch64_sve_cmplt_wide:
14540  return tryConvertSVEWideCompare(N, ISD::SETLT, DCI, DAG);
14541  case Intrinsic::aarch64_sve_cmple_wide:
14542  return tryConvertSVEWideCompare(N, ISD::SETLE, DCI, DAG);
14543  case Intrinsic::aarch64_sve_cmphs_wide:
14544  return tryConvertSVEWideCompare(N, ISD::SETUGE, DCI, DAG);
14545  case Intrinsic::aarch64_sve_cmphi_wide:
14546  return tryConvertSVEWideCompare(N, ISD::SETUGT, DCI, DAG);
14547  case Intrinsic::aarch64_sve_cmplo_wide:
14548  return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
14549  case Intrinsic::aarch64_sve_cmpls_wide:
14550  return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
14551  case Intrinsic::aarch64_sve_ptest_any:
14552  return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
14554  case Intrinsic::aarch64_sve_ptest_first:
14555  return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
14557  case Intrinsic::aarch64_sve_ptest_last:
14558  return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
14560  }
14561  return SDValue();
14562 }
14563 
14566  SelectionDAG &DAG) {
14567  // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
14568  // we can convert that DUP into another extract_high (of a bigger DUP), which
14569  // helps the backend to decide that an sabdl2 would be useful, saving a real
14570  // extract_high operation.
14571  if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
14572  (N->getOperand(0).getOpcode() == ISD::ABDU ||
14573  N->getOperand(0).getOpcode() == ISD::ABDS)) {
14574  SDNode *ABDNode = N->getOperand(0).getNode();
14575  SDValue NewABD =
14577  if (!NewABD.getNode())
14578  return SDValue();
14579 
14580  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
14581  }
14582  return SDValue();
14583 }
14584 
14586  SDValue SplatVal, unsigned NumVecElts) {
14587  assert(!St.isTruncatingStore() && "cannot split truncating vector store");
14588  unsigned OrigAlignment = St.getAlignment();
14589  unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
14590 
14591  // Create scalar stores. This is at least as good as the code sequence for a
14592  // split unaligned store which is a dup.s, ext.b, and two stores.
14593  // Most of the time the three stores should be replaced by store pair
14594  // instructions (stp).
14595  SDLoc DL(&St);
14596  SDValue BasePtr = St.getBasePtr();
14597  uint64_t BaseOffset = 0;
14598 
14599  const MachinePointerInfo &PtrInfo = St.getPointerInfo();
14600  SDValue NewST1 =
14601  DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
14602  OrigAlignment, St.getMemOperand()->getFlags());
14603 
14604  // As this in ISel, we will not merge this add which may degrade results.
14605  if (BasePtr->getOpcode() == ISD::ADD &&
14606  isa<ConstantSDNode>(BasePtr->getOperand(1))) {
14607  BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
14608  BasePtr = BasePtr->getOperand(0);
14609  }
14610 
14611  unsigned Offset = EltOffset;
14612  while (--NumVecElts) {
14613  unsigned Alignment = MinAlign(OrigAlignment, Offset);
14614  SDValue OffsetPtr =
14615  DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
14616  DAG.getConstant(BaseOffset + Offset, DL, MVT::i64));
14617  NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
14618  PtrInfo.getWithOffset(Offset), Alignment,
14619  St.getMemOperand()->getFlags());
14620  Offset += EltOffset;
14621  }
14622  return NewST1;
14623 }
14624 
14625 // Returns an SVE type that ContentTy can be trivially sign or zero extended
14626 // into.
14627 static MVT getSVEContainerType(EVT ContentTy) {
14628  assert(ContentTy.isSimple() && "No SVE containers for extended types");
14629 
14630  switch (ContentTy.getSimpleVT().SimpleTy) {
14631  default:
14632  llvm_unreachable("No known SVE container for this MVT type");
14633  case MVT::nxv2i8:
14634  case MVT::nxv2i16:
14635  case MVT::nxv2i32:
14636  case MVT::nxv2i64:
14637  case MVT::nxv2f32:
14638  case MVT::nxv2f64:
14639  return MVT::nxv2i64;
14640  case MVT::nxv4i8:
14641  case MVT::nxv4i16:
14642  case MVT::nxv4i32:
14643  case MVT::nxv4f32:
14644  return MVT::nxv4i32;
14645  case MVT::nxv8i8:
14646  case MVT::nxv8i16:
14647  case MVT::nxv8f16:
14648  case MVT::nxv8bf16:
14649  return MVT::nxv8i16;
14650  case MVT::nxv16i8:
14651  return MVT::nxv16i8;
14652  }
14653 }
14654 
14655 static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
14656  SDLoc DL(N);
14657  EVT VT = N->getValueType(0);
14658 
14660  return SDValue();
14661 
14662  EVT ContainerVT = VT;
14663  if (ContainerVT.isInteger())
14664  ContainerVT = getSVEContainerType(ContainerVT);
14665 
14666  SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
14667  SDValue Ops[] = { N->getOperand(0), // Chain
14668  N->getOperand(2), // Pg
14669  N->getOperand(3), // Base
14670  DAG.getValueType(VT) };
14671 
14672  SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
14673  SDValue LoadChain = SDValue(Load.getNode(), 1);
14674 
14675  if (ContainerVT.isInteger() && (VT != ContainerVT))
14676  Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));
14677 
14678  return DAG.getMergeValues({ Load, LoadChain }, DL);
14679 }
14680 
14682  SDLoc DL(N);
14683  EVT VT = N->getValueType(0);
14684  EVT PtrTy = N->getOperand(3).getValueType();
14685 
14686  if (VT == MVT::nxv8bf16 &&
14687  !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
14688  return SDValue();
14689 
14690  EVT LoadVT = VT;
14691  if (VT.isFloatingPoint())
14692  LoadVT = VT.changeTypeToInteger();
14693 
14694  auto *MINode = cast<MemIntrinsicSDNode>(N);
14695  SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
14696  SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
14697  MINode->getOperand(3), DAG.getUNDEF(PtrTy),
14698  MINode->getOperand(2), PassThru,
14699  MINode->getMemoryVT(), MINode->getMemOperand(),
14701 
14702  if (VT.isFloatingPoint()) {
14703  SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
14704  return DAG.getMergeValues(Ops, DL);
14705  }
14706 
14707  return L;
14708 }
14709 
14710 template <unsigned Opcode>
14712  static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO ||
14713  Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
14714  "Unsupported opcode.");
14715  SDLoc DL(N);
14716  EVT VT = N->getValueType(0);
14717  if (VT == MVT::nxv8bf16 &&
14718  !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
14719  return SDValue();
14720 
14721  EVT LoadVT = VT;
14722  if (VT.isFloatingPoint())
14723  LoadVT = VT.changeTypeToInteger();
14724 
14725  SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
14726  SDValue Load = DAG.getNode(Opcode, DL, {LoadVT, MVT::Other}, Ops);
14727  SDValue LoadChain = SDValue(Load.getNode(), 1);
14728 
14729  if (VT.isFloatingPoint())
14730  Load = DAG.getNode(ISD::BITCAST, DL, VT, Load.getValue(0));
14731 
14732  return DAG.getMergeValues({Load, LoadChain}, DL);
14733 }
14734 
14736  SDLoc DL(N);
14737  SDValue Data = N->getOperand(2);
14738  EVT DataVT = Data.getValueType();
14739  EVT HwSrcVt = getSVEContainerType(DataVT);
14740  SDValue InputVT = DAG.getValueType(DataVT);
14741 
14742  if (DataVT == MVT::nxv8bf16 &&
14743  !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
14744  return SDValue();
14745 
14746  if (DataVT.isFloatingPoint())
14747  InputVT = DAG.getValueType(HwSrcVt);
14748 
14749  SDValue SrcNew;
14750  if (Data.getValueType().isFloatingPoint())
14751  SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Data);
14752  else
14753  SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);
14754 
14755  SDValue Ops[] = { N->getOperand(0), // Chain
14756  SrcNew,
14757  N->getOperand(4), // Base
14758  N->getOperand(3), // Pg
14759  InputVT
14760  };
14761 
14762  return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
14763 }
14764 
14766  SDLoc DL(N);
14767 
14768  SDValue Data = N->getOperand(2);
14769  EVT DataVT = Data.getValueType();
14770  EVT PtrTy = N->getOperand(4).getValueType();
14771 
14772  if (DataVT == MVT::nxv8bf16 &&
14773  !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
14774  return SDValue();
14775 
14776  if (DataVT.isFloatingPoint())
14777  Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
14778 
14779  auto *MINode = cast<MemIntrinsicSDNode>(N);
14780  return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
14781  DAG.getUNDEF(PtrTy), MINode->getOperand(3),
14782  MINode->getMemoryVT(), MINode->getMemOperand(),
14783  ISD::UNINDEXED, false, false);
14784 }
14785 
14786 /// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
14787 /// load store optimizer pass will merge them to store pair stores. This should
14788 /// be better than a movi to create the vector zero followed by a vector store
14789 /// if the zero constant is not re-used, since one instructions and one register
14790 /// live range will be removed.
14791 ///
14792 /// For example, the final generated code should be:
14793 ///
14794 /// stp xzr, xzr, [x0]
14795 ///
14796 /// instead of:
14797 ///
14798 /// movi v0.2d, #0
14799 /// str q0, [x0]
14800 ///
14802  SDValue StVal = St.getValue();
14803  EVT VT = StVal.getValueType();
14804 
14805  // Avoid scalarizing zero splat stores for scalable vectors.
14806  if (VT.isScalableVector())
14807  return SDValue();
14808 
14809  // It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
14810  // 2, 3 or 4 i32 elements.
14811  int NumVecElts = VT.getVectorNumElements();
14812  if (!(((NumVecElts == 2 || NumVecElts == 3) &&
14813  VT.getVectorElementType().getSizeInBits() == 64) ||
14814  ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
14815  VT.getVectorElementType().getSizeInBits() == 32)))
14816  return SDValue();
14817 
14818  if (StVal.getOpcode() != ISD::BUILD_VECTOR)
14819  return SDValue();
14820 
14821  // If the zero constant has more than one use then the vector store could be
14822  // better since the constant mov will be amortized and stp q instructions
14823  // should be able to be formed.
14824  if (!StVal.hasOneUse())
14825  return SDValue();
14826 
14827  // If the store is truncating then it's going down to i16 or smaller, which
14828  // means it can be implemented in a single store anyway.
14829  if (St.isTruncatingStore())
14830  return SDValue();
14831 
14832  // If the immediate offset of the address operand is too large for the stp
14833  // instruction, then bail out.
14834  if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
14835  int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
14836  if (Offset < -512 || Offset > 504)
14837  return SDValue();
14838  }
14839 
14840  for (int I = 0; I < NumVecElts; ++I) {
14841  SDValue EltVal = StVal.getOperand(I);
14842  if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal))
14843  return SDValue();
14844  }
14845 
14846  // Use a CopyFromReg WZR/XZR here to prevent
14847  // DAGCombiner::MergeConsecutiveStores from undoing this transformation.
14848  SDLoc DL(&St);
14849  unsigned ZeroReg;
14850  EVT ZeroVT;
14851  if (VT.getVectorElementType().getSizeInBits() == 32) {
14852  ZeroReg = AArch64::WZR;
14853  ZeroVT = MVT::i32;
14854  } else {
14855  ZeroReg = AArch64::XZR;
14856  ZeroVT = MVT::i64;
14857  }
14858  SDValue SplatVal =
14859  DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
14860  return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
14861 }
14862 
14863 /// Replace a splat of a scalar to a vector store by scalar stores of the scalar
14864 /// value. The load store optimizer pass will merge them to store pair stores.
14865 /// This has better performance than a splat of the scalar followed by a split
14866 /// vector store. Even if the stores are not merged it is four stores vs a dup,
14867 /// followed by an ext.b and two stores.
14869  SDValue StVal = St.getValue();
14870  EVT VT = StVal.getValueType();
14871 
14872  // Don't replace floating point stores, they possibly won't be transformed to
14873  // stp because of the store pair suppress pass.
14874  if (VT.isFloatingPoint())
14875  return SDValue();
14876 
14877  // We can express a splat as store pair(s) for 2 or 4 elements.
14878  unsigned NumVecElts = VT.getVectorNumElements();
14879  if (NumVecElts != 4 && NumVecElts != 2)
14880  return SDValue();
14881 
14882  // If the store is truncating then it's going down to i16 or smaller, which
14883  // means it can be implemented in a single store anyway.
14884  if (St.isTruncatingStore())
14885  return SDValue();
14886 
14887  // Check that this is a splat.
14888  // Make sure that each of the relevant vector element locations are inserted
14889  // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
14890  std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
14891  SDValue SplatVal;
14892  for (unsigned I = 0; I < NumVecElts; ++I) {
14893  // Check for insert vector elements.
14894  if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
14895  return SDValue();
14896 
14897  // Check that same value is inserted at each vector element.
14898  if (I == 0)
14899  SplatVal = StVal.getOperand(1);
14900  else if (StVal.getOperand(1) != SplatVal)
14901  return SDValue();
14902 
14903  // Check insert element index.
14904  ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
14905  if (!CIndex)
14906  return SDValue();
14907  uint64_t IndexVal = CIndex->getZExtValue();
14908  if (IndexVal >= NumVecElts)
14909  return SDValue();
14910  IndexNotInserted.reset(IndexVal);
14911 
14912  StVal = StVal.getOperand(0);
14913  }
14914  // Check that all vector element locations were inserted to.
14915  if (IndexNotInserted.any())
14916  return SDValue();
14917 
14918  return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
14919 }
14920 
14922  SelectionDAG &DAG,
14923  const AArch64Subtarget *Subtarget) {
14924 
14925  StoreSDNode *S = cast<StoreSDNode>(N);
14926  if (S->isVolatile() || S->isIndexed())
14927  return SDValue();
14928 
14929  SDValue StVal = S->getValue();
14930  EVT VT = StVal.getValueType();
14931 
14932  if (!VT.isFixedLengthVector())
14933  return SDValue();
14934 
14935  // If we get a splat of zeros, convert this vector store to a store of
14936  // scalars. They will be merged into store pairs of xzr thereby removing one
14937  // instruction and one register.
14938  if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S))
14939  return ReplacedZeroSplat;
14940 
14941  // FIXME: The logic for deciding if an unaligned store should be split should
14942  // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
14943  // a call to that function here.
14944 
14945  if (!Subtarget->isMisaligned128StoreSlow())
14946  return SDValue();
14947 
14948  // Don't split at -Oz.
14950  return SDValue();
14951 
14952  // Don't split v2i64 vectors. Memcpy lowering produces those and splitting
14953  // those up regresses performance on micro-benchmarks and olden/bh.
14954  if (VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
14955  return SDValue();
14956 
14957  // Split unaligned 16B stores. They are terrible for performance.
14958  // Don't split stores with alignment of 1 or 2. Code that uses clang vector
14959  // extensions can use this to mark that it does not want splitting to happen
14960  // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
14961  // eliminating alignment hazards is only 1 in 8 for alignment of 2.
14962  if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 ||
14963  S->getAlignment() <= 2)
14964  return SDValue();
14965 
14966  // If we get a splat of a scalar convert this vector store to a store of
14967  // scalars. They will be merged into store pairs thereby removing two
14968  // instructions.
14969  if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S))
14970  return ReplacedSplat;
14971 
14972  SDLoc DL(S);
14973 
14974  // Split VT into two.
14975  EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
14976  unsigned NumElts = HalfVT.getVectorNumElements();
14977  SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
14978  DAG.getConstant(0, DL, MVT::i64));
14979  SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
14980  DAG.getConstant(NumElts, DL, MVT::i64));
14981  SDValue BasePtr = S->getBasePtr();
14982  SDValue NewST1 =
14983  DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
14984  S->getAlignment(), S->getMemOperand()->getFlags());
14985  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
14986  DAG.getConstant(8, DL, MVT::i64));
14987  return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
14988  S->getPointerInfo(), S->getAlignment(),
14989  S->getMemOperand()->getFlags());
14990 }
14991 
14993  assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!");
14994 
14995  // splice(pg, op1, undef) -> op1
14996  if (N->getOperand(2).isUndef())
14997  return N->getOperand(1);
14998 
14999  return SDValue();
15000 }
15001 
15003  SDLoc DL(N);
15004  SDValue Op0 = N->getOperand(0);
15005  SDValue Op1 = N->getOperand(1);
15006  EVT ResVT = N->getValueType(0);
15007 
15008  // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
15009  if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
15010  if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
15011  SDValue X = Op0.getOperand(0).getOperand(0);
15012  return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
15013  }
15014  }
15015 
15016  // uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
15017  if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
15018  if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
15019  SDValue Z = Op1.getOperand(0).getOperand(1);
15020  return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
15021  }
15022  }
15023 
15024  return SDValue();
15025 }
15026 
15028  unsigned Opc = N->getOpcode();
15029 
15030  assert(((Opc >= AArch64ISD::GLD1_MERGE_ZERO && // unsigned gather loads
15032  (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && // signed gather loads
15034  "Invalid opcode.");
15035 
15036  const bool Scaled = Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO ||
15038  const bool Signed = Opc == AArch64ISD::GLD1S_MERGE_ZERO ||
15040  const bool Extended = Opc == AArch64ISD::GLD1_SXTW_MERGE_ZERO ||
15044 
15045  SDLoc DL(N);
15046  SDValue Chain = N->getOperand(0);
15047  SDValue Pg = N->getOperand(1);
15048  SDValue Base = N->getOperand(2);
15049  SDValue Offset = N->getOperand(3);
15050  SDValue Ty = N->getOperand(4);
15051 
15052  EVT ResVT = N->getValueType(0);
15053 
15054  const auto OffsetOpc = Offset.getOpcode();
15055  const bool OffsetIsZExt =
15057  const bool OffsetIsSExt =
15059 
15060  // Fold sign/zero extensions of vector offsets into GLD1 nodes where possible.
15061  if (!Extended && (OffsetIsSExt || OffsetIsZExt)) {
15062  SDValue ExtPg = Offset.getOperand(0);
15063  VTSDNode *ExtFrom = cast<VTSDNode>(Offset.getOperand(2).getNode());
15064  EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType();
15065 
15066  // If the predicate for the sign- or zero-extended offset is the
15067  // same as the predicate used for this load and the sign-/zero-extension
15068  // was from a 32-bits...
15069  if (ExtPg == Pg && ExtFromEVT == MVT::i32) {
15070  SDValue UnextendedOffset = Offset.getOperand(1);
15071 
15072  unsigned NewOpc = getGatherVecOpcode(Scaled, OffsetIsSExt, true);
15073  if (Signed)
15074  NewOpc = getSignExtendedGatherOpcode(NewOpc);
15075 
15076  return DAG.getNode(NewOpc, DL, {ResVT, MVT::Other},
15077  {Chain, Pg, Base, UnextendedOffset, Ty});
15078  }
15079  }
15080 
15081  return SDValue();
15082 }
15083 
15084 /// Optimize a vector shift instruction and its operand if shifted out
15085 /// bits are not used.
15087  const AArch64TargetLowering &TLI,
15089  assert(N->getOpcode() == AArch64ISD::VASHR ||
15090  N->getOpcode() == AArch64ISD::VLSHR);
15091 
15092  SDValue Op = N->getOperand(0);
15093  unsigned OpScalarSize = Op.getScalarValueSizeInBits();
15094 
15095  unsigned ShiftImm = N->getConstantOperandVal(1);
15096  assert(OpScalarSize > ShiftImm && "Invalid shift imm");
15097 
15098  APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm);
15099  APInt DemandedMask = ~ShiftedOutBits;
15100 
15101  if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15102  return SDValue(N, 0);
15103 
15104  return SDValue();
15105 }
15106 
15107 /// Target-specific DAG combine function for post-increment LD1 (lane) and
15108 /// post-increment LD1R.
15111  bool IsLaneOp) {
15112  if (DCI.isBeforeLegalizeOps())
15113  return SDValue();
15114 
15115  SelectionDAG &DAG = DCI.DAG;
15116  EVT VT = N->getValueType(0);
15117 
15118  if (VT.isScalableVector())
15119  return SDValue();
15120 
15121  unsigned LoadIdx = IsLaneOp ? 1 : 0;
15122  SDNode *LD = N->getOperand(LoadIdx).getNode();
15123  // If it is not LOAD, can not do such combine.
15124  if (LD->getOpcode() != ISD::LOAD)
15125  return SDValue();
15126 
15127  // The vector lane must be a constant in the LD1LANE opcode.
15128  SDValue Lane;
15129  if (IsLaneOp) {
15130  Lane = N->getOperand(2);
15131  auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
15132  if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
15133  return SDValue();
15134  }
15135 
15136  LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
15137  EVT MemVT = LoadSDN->getMemoryVT();
15138  // Check if memory operand is the same type as the vector element.
15139  if (MemVT != VT.getVectorElementType())
15140  return SDValue();
15141 
15142  // Check if there are other uses. If so, do not combine as it will introduce
15143  // an extra load.
15144  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
15145  ++UI) {
15146  if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
15147  continue;
15148  if (*UI != N)
15149  return SDValue();
15150  }
15151 
15152  SDValue Addr = LD->getOperand(1);
15153  SDValue Vector = N->getOperand(0);
15154  // Search for a use of the address operand that is an increment.
15155  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
15156  Addr.getNode()->use_end(); UI != UE; ++UI) {
15157  SDNode *User = *UI;
15158  if (User->getOpcode() != ISD::ADD
15159  || UI.getUse().getResNo() != Addr.getResNo())
15160  continue;
15161 
15162  // If the increment is a constant, it must match the memory ref size.
15163  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
15164  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
15165  uint32_t IncVal = CInc->getZExtValue();
15166  unsigned NumBytes = VT.getScalarSizeInBits() / 8;
15167  if (IncVal != NumBytes)
15168  continue;
15169  Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
15170  }
15171 
15172  // To avoid cycle construction make sure that neither the load nor the add
15173  // are predecessors to each other or the Vector.
15176  Visited.insert(Addr.getNode());
15177  Worklist.push_back(User);
15178  Worklist.push_back(LD);
15179  Worklist.push_back(Vector.getNode());
15180  if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) ||
15181  SDNode::hasPredecessorHelper(User, Visited, Worklist))
15182  continue;
15183 
15185  Ops.push_back(LD->getOperand(0)); // Chain
15186  if (IsLaneOp) {
15187  Ops.push_back(Vector); // The vector to be inserted
15188  Ops.push_back(Lane); // The lane to be inserted in the vector
15189  }
15190  Ops.push_back(Addr);
15191  Ops.push_back(Inc);
15192 
15193  EVT Tys[3] = { VT, MVT::i64, MVT::Other };
15194  SDVTList SDTys = DAG.getVTList(Tys);
15195  unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
15196  SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
15197  MemVT,
15198  LoadSDN->getMemOperand());
15199 
15200  // Update the uses.
15201  SDValue NewResults[] = {
15202  SDValue(LD, 0), // The result of load
15203  SDValue(UpdN.getNode(), 2) // Chain
15204  };
15205  DCI.CombineTo(LD, NewResults);
15206  DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
15207  DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
15208 
15209  break;
15210  }
15211  return SDValue();
15212 }
15213 
15214 /// Simplify ``Addr`` given that the top byte of it is ignored by HW during
15215 /// address translation.
15218  SelectionDAG &DAG) {
15219  APInt DemandedMask = APInt::getLowBitsSet(64, 56);
15220  KnownBits Known;
15222  !DCI.isBeforeLegalizeOps());
15223  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15224  if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {
15225  DCI.CommitTargetLoweringOpt(TLO);
15226  return true;
15227  }
15228  return false;
15229 }
15230 
15232  assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) &&
15233  "Expected STORE dag node in input!");
15234 
15235  if (auto Store = dyn_cast<StoreSDNode>(N)) {
15236  if (!Store->isTruncatingStore() || Store->isIndexed())
15237  return SDValue();
15238  SDValue Ext = Store->getValue();
15239  auto ExtOpCode = Ext.getOpcode();
15240  if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND &&
15241  ExtOpCode != ISD::ANY_EXTEND)
15242  return SDValue();
15243  SDValue Orig = Ext->getOperand(0);
15244  if (Store->getMemoryVT() != Orig->getValueType(0))
15245  return SDValue();
15246  return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
15247  Store->getBasePtr(), Store->getPointerInfo(),
15248  Store->getAlign());
15249  }
15250 
15251  return SDValue();
15252 }
15253 
15256  SelectionDAG &DAG,
15257  const AArch64Subtarget *Subtarget) {
15258  if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
15259  return Split;
15260 
15261  if (Subtarget->supportsAddressTopByteIgnored() &&
15262  performTBISimplification(N->getOperand(2), DCI, DAG))
15263  return SDValue(N, 0);
15264 
15265  if (SDValue Store = foldTruncStoreOfExt(DAG, N))
15266  return Store;
15267 
15268  return SDValue();
15269 }
15270 
15271 /// Target-specific DAG combine function for NEON load/store intrinsics
15272 /// to merge base address updates.
15275  SelectionDAG &DAG) {
15276  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
15277  return SDValue();
15278 
15279  unsigned AddrOpIdx = N->getNumOperands() - 1;
15280  SDValue Addr = N->getOperand(AddrOpIdx);
15281 
15282  // Search for a use of the address operand that is an increment.
15283  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
15284  UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
15285  SDNode *User = *UI;
15286  if (User->getOpcode() != ISD::ADD ||
15287  UI.getUse().getResNo() != Addr.getResNo())
15288  continue;
15289 
15290  // Check that the add is independent of the load/store. Otherwise, folding
15291  // it would create a cycle.
15294  Visited.insert(Addr.getNode());
15295  Worklist.push_back(N);
15296  Worklist.push_back(User);
15297  if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
15298  SDNode::hasPredecessorHelper(User, Visited, Worklist))
15299  continue;
15300 
15301  // Find the new opcode for the updating load/store.
15302  bool IsStore = false;
15303  bool IsLaneOp = false;
15304  bool IsDupOp = false;
15305  unsigned NewOpc = 0;
15306  unsigned NumVecs = 0;
15307  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
15308  switch (IntNo) {
15309  default: llvm_unreachable("unexpected intrinsic for Neon base update");
15310  case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
15311  NumVecs = 2; break;
15312  case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
15313  NumVecs = 3; break;
15314  case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
15315  NumVecs = 4; break;
15316  case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
15317  NumVecs = 2; IsStore = true; break;
15318  case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
15319  NumVecs = 3; IsStore = true; break;
15320  case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
15321  NumVecs = 4; IsStore = true; break;
15322  case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
15323  NumVecs = 2; break;
15324  case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
15325  NumVecs = 3; break;
15326  case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
15327  NumVecs = 4; break;
15328  case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
15329  NumVecs = 2; IsStore = true; break;
15330  case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
15331  NumVecs = 3; IsStore = true; break;
15332  case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
15333  NumVecs = 4; IsStore = true; break;
15334  case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
15335  NumVecs = 2; IsDupOp = true; break;
15336  case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
15337  NumVecs = 3; IsDupOp = true; break;
15338  case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
15339  NumVecs = 4; IsDupOp = true; break;
15340  case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
15341  NumVecs = 2; IsLaneOp = true; break;
15342  case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
15343  NumVecs = 3; IsLaneOp = true; break;
15344  case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
15345  NumVecs = 4; IsLaneOp = true; break;
15346  case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
15347  NumVecs = 2; IsStore = true; IsLaneOp = true; break;
15348  case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
15349  NumVecs = 3; IsStore = true; IsLaneOp = true; break;
15350  case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
15351  NumVecs = 4; IsStore = true; IsLaneOp = true; break;
15352  }
15353 
15354  EVT VecTy;
15355  if (IsStore)
15356  VecTy = N->getOperand(2).getValueType();
15357  else
15358  VecTy = N->getValueType(0);
15359 
15360  // If the increment is a constant, it must match the memory ref size.
15361  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
15362  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
15363  uint32_t IncVal = CInc->getZExtValue();
15364  unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
15365  if (IsLaneOp || IsDupOp)
15366  NumBytes /= VecTy.getVectorNumElements();
15367  if (IncVal != NumBytes)
15368  continue;
15369  Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
15370  }
15372  Ops.push_back(N->getOperand(0)); // Incoming chain
15373  // Load lane and store have vector list as input.
15374  if (IsLaneOp || IsStore)
15375  for (unsigned i = 2; i < AddrOpIdx; ++i)
15376  Ops.push_back(N->getOperand(i));
15377  Ops.push_back(Addr); // Base register
15378  Ops.push_back(Inc);
15379 
15380  // Return Types.
15381  EVT Tys[6];
15382  unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
15383  unsigned n;
15384  for (n = 0; n < NumResultVecs; ++n)
15385  Tys[n] = VecTy;
15386  Tys[n++] = MVT::i64; // Type of write back register
15387  Tys[n] = MVT::Other; // Type of the chain
15388  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
15389 
15390  MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
15391  SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
15392  MemInt->getMemoryVT(),
15393  MemInt->getMemOperand());
15394 
15395  // Update the uses.
15396  std::vector<SDValue> NewResults;
15397  for (unsigned i = 0; i < NumResultVecs; ++i) {
15398  NewResults.push_back(SDValue(UpdN.getNode(), i));
15399  }
15400  NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
15401  DCI.CombineTo(N, NewResults);
15402  DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
15403 
15404  break;
15405  }
15406  return SDValue();
15407 }
15408 
15409 // Checks to see if the value is the prescribed width and returns information
15410 // about its extension mode.
15411 static
15412 bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
15413  ExtType = ISD::NON_EXTLOAD;
15414  switch(V.getNode()->getOpcode()) {
15415  default:
15416  return false;
15417  case ISD::LOAD: {
15418  LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
15419  if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
15420  || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
15421  ExtType = LoadNode->getExtensionType();
15422  return true;
15423  }
15424  return false;
15425  }
15426  case ISD::AssertSext: {
15427  VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
15428  if ((TypeNode->getVT() == MVT::i8 && width == 8)
15429  || (TypeNode->getVT() == MVT::i16 && width == 16)) {
15430  ExtType = ISD::SEXTLOAD;
15431  return true;
15432  }
15433  return false;
15434  }
15435  case ISD::AssertZext: {
15436  VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
15437  if ((TypeNode->getVT() == MVT::i8 && width == 8)
15438  || (TypeNode->getVT() == MVT::i16 && width == 16)) {
15439  ExtType = ISD::ZEXTLOAD;
15440  return true;
15441  }
15442  return false;
15443  }
15444  case ISD::Constant:
15445  case ISD::TargetConstant: {
15446  return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
15447  1LL << (width - 1);
15448  }
15449  }
15450 
15451  return true;
15452 }
15453 
15454 // This function does a whole lot of voodoo to determine if the tests are
15455 // equivalent without and with a mask. Essentially what happens is that given a
15456 // DAG resembling:
15457 //
15458 // +-------------+ +-------------+ +-------------+ +-------------+
15459 // | Input | | AddConstant | | CompConstant| | CC |
15460 // +-------------+ +-------------+ +-------------+ +-------------+
15461 // | | | |
15462 // V V | +----------+
15463 // +-------------+ +----+ | |
15464 // | ADD | |0xff| | |
15465 // +-------------+ +----+ | |
15466 // | | | |
15467 // V V | |
15468 // +-------------+ | |
15469 // | AND | | |
15470 // +-------------+ | |
15471 // | | |
15472 // +-----+ | |
15473 // | | |
15474 // V V V
15475 // +-------------+
15476 // | CMP |
15477 // +-------------+
15478 //
15479 // The AND node may be safely removed for some combinations of inputs. In
15480 // particular we need to take into account the extension type of the Input,
15481 // the exact values of AddConstant, CompConstant, and CC, along with the nominal
15482 // width of the input (this can work for any width inputs, the above graph is
15483 // specific to 8 bits.
15484 //
15485 // The specific equations were worked out by generating output tables for each
15486 // AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
15487 // problem was simplified by working with 4 bit inputs, which means we only
15488 // needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
15489 // extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
15490 // patterns present in both extensions (0,7). For every distinct set of
15491 // AddConstant and CompConstants bit patterns we can consider the masked and
15492 // unmasked versions to be equivalent if the result of this function is true for
15493 // all 16 distinct bit patterns of for the current extension type of Input (w0).
15494 //
15495 // sub w8, w0, w1
15496 // and w10, w8, #0x0f
15497 // cmp w8, w2
15498 // cset w9, AArch64CC
15499 // cmp w10, w2
15500 // cset w11, AArch64CC
15501 // cmp w9, w11
15502 // cset w0, eq
15503 // ret
15504 //
15505 // Since the above function shows when the outputs are equivalent it defines
15506 // when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
15507 // would be expensive to run during compiles. The equations below were written
15508 // in a test harness that confirmed they gave equivalent outputs to the above
15509 // for all inputs function, so they can be used determine if the removal is
15510 // legal instead.
15511 //
15512 // isEquivalentMaskless() is the code for testing if the AND can be removed
15513 // factored out of the DAG recognition as the DAG can take several forms.
15514 
15515 static bool isEquivalentMaskless(unsigned CC, unsigned width,
15516  ISD::LoadExtType ExtType, int AddConstant,
15517  int CompConstant) {
15518  // By being careful about our equations and only writing the in term
15519  // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
15520  // make them generally applicable to all bit widths.
15521  int MaxUInt = (1 << width);
15522 
15523  // For the purposes of these comparisons sign extending the type is
15524  // equivalent to zero extending the add and displacing it by half the integer
15525  // width. Provided we are careful and make sure our equations are valid over
15526  // the whole range we can just adjust the input and avoid writing equations
15527  // for sign extended inputs.
15528  if (ExtType == ISD::SEXTLOAD)
15529  AddConstant -= (1 << (width-1));
15530 
15531  switch(CC) {
15532  case AArch64CC::LE:
15533  case AArch64CC::GT:
15534  if ((AddConstant == 0) ||
15535  (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
15536  (AddConstant >= 0 && CompConstant < 0) ||
15537  (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
15538  return true;
15539  break;
15540  case AArch64CC::LT:
15541  case AArch64CC::GE:
15542  if ((AddConstant == 0) ||
15543  (AddConstant >= 0 && CompConstant <= 0) ||
15544  (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
15545  return true;
15546  break;
15547  case AArch64CC::HI:
15548  case AArch64CC::LS:
15549  if ((AddConstant >= 0 && CompConstant < 0) ||
15550  (AddConstant <= 0 && CompConstant >= -1 &&
15551  CompConstant < AddConstant + MaxUInt))
15552  return true;
15553  break;
15554  case AArch64CC::PL:
15555  case AArch64CC::MI:
15556  if ((AddConstant == 0) ||
15557  (AddConstant > 0 && CompConstant <= 0) ||
15558  (AddConstant < 0 && CompConstant <= AddConstant))
15559  return true;
15560  break;
15561  case AArch64CC::LO:
15562  case AArch64CC::HS:
15563  if ((AddConstant >= 0 && CompConstant <= 0) ||
15564  (AddConstant <= 0 && CompConstant >= 0 &&
15565  CompConstant <= AddConstant + MaxUInt))
15566  return true;
15567  break;
15568  case AArch64CC::EQ:
15569  case AArch64CC::NE:
15570  if ((AddConstant > 0 && CompConstant < 0) ||
15571  (AddConstant < 0 && CompConstant >= 0 &&
15572  CompConstant < AddConstant + MaxUInt) ||
15573  (AddConstant >= 0 && CompConstant >= 0 &&
15574  CompConstant >= AddConstant) ||
15575  (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
15576  return true;
15577  break;
15578  case AArch64CC::VS:
15579  case AArch64CC::VC:
15580  case AArch64CC::AL:
15581  case AArch64CC::NV:
15582  return true;
15583  case AArch64CC::Invalid:
15584  break;
15585  }
15586 
15587  return false;
15588 }
15589 
15590 static
15593  SelectionDAG &DAG, unsigned CCIndex,
15594  unsigned CmpIndex) {
15595  unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
15596  SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
15597  unsigned CondOpcode = SubsNode->getOpcode();
15598 
15599  if (CondOpcode != AArch64ISD::SUBS)
15600  return SDValue();
15601 
15602  // There is a SUBS feeding this condition. Is it fed by a mask we can
15603  // use?
15604 
15605  SDNode *AndNode = SubsNode->getOperand(0).getNode();
15606  unsigned MaskBits = 0;
15607 
15608  if (AndNode->getOpcode() != ISD::AND)
15609  return SDValue();
15610 
15611  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
15612  uint32_t CNV = CN->getZExtValue();
15613  if (CNV == 255)
15614  MaskBits = 8;
15615  else if (CNV == 65535)
15616  MaskBits = 16;
15617  }
15618 
15619  if (!MaskBits)
15620  return SDValue();
15621 
15622  SDValue AddValue = AndNode->getOperand(0);
15623 
15624  if (AddValue.getOpcode() != ISD::ADD)
15625  return SDValue();
15626 
15627  // The basic dag structure is correct, grab the inputs and validate them.
15628 
15629  SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
15630  SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
15631  SDValue SubsInputValue = SubsNode->getOperand(1);
15632 
15633  // The mask is present and the provenance of all the values is a smaller type,
15634  // lets see if the mask is superfluous.
15635 
15636  if (!isa<ConstantSDNode>(AddInputValue2.getNode()) ||
15637  !isa<ConstantSDNode>(SubsInputValue.getNode()))
15638  return SDValue();
15639 
15640  ISD::LoadExtType ExtType;
15641 
15642  if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
15643  !checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
15644  !checkValueWidth(AddInputValue1, MaskBits, ExtType) )
15645  return SDValue();
15646 
15647  if(!isEquivalentMaskless(CC, MaskBits, ExtType,
15648  cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
15649  cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
15650  return SDValue();
15651 
15652  // The AND is not necessary, remove it.
15653 
15654  SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
15655  SubsNode->getValueType(1));
15656  SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
15657 
15658  SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
15659  DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
15660 
15661  return SDValue(N, 0);
15662 }
15663 
15664 // Optimize compare with zero and branch.
15667  SelectionDAG &DAG) {
15668  MachineFunction &MF = DAG.getMachineFunction();
15669  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
15670  // will not be produced, as they are conditional branch instructions that do
15671  // not set flags.
15672  if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
15673  return SDValue();
15674 
15675  if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
15676  N = NV.getNode();
15677  SDValue Chain = N->getOperand(0);
15678  SDValue Dest = N->getOperand(1);
15679  SDValue CCVal = N->getOperand(2);
15680  SDValue Cmp = N->getOperand(3);
15681 
15682  assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
15683  unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
15684  if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
15685  return SDValue();
15686 
15687  unsigned CmpOpc = Cmp.getOpcode();
15688  if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
15689  return SDValue();
15690 
15691  // Only attempt folding if there is only one use of the flag and no use of the
15692  // value.
15693  if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
15694  return SDValue();
15695 
15696  SDValue LHS = Cmp.getOperand(0);
15697  SDValue RHS = Cmp.getOperand(1);
15698 
15699  assert(LHS.getValueType() == RHS.getValueType() &&
15700  "Expected the value type to be the same for both operands!");
15701  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
15702  return SDValue();
15703 
15704  if (isNullConstant(LHS))
15705  std::swap(LHS, RHS);
15706 
15707  if (!isNullConstant(RHS))
15708  return SDValue();
15709 
15710  if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
15711  LHS.getOpcode() == ISD::SRL)
15712  return SDValue();
15713 
15714  // Fold the compare into the branch instruction.
15715  SDValue BR;
15716  if (CC == AArch64CC::EQ)
15717  BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
15718  else
15719  BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
15720 
15721  // Do not add new nodes to DAG combiner worklist.
15722  DCI.CombineTo(N, BR, false);
15723 
15724  return SDValue();
15725 }
15726 
15727 // Optimize CSEL instructions
15730  SelectionDAG &DAG) {
15731  // CSEL x, x, cc -> x
15732  if (N->getOperand(0) == N->getOperand(1))
15733  return N->getOperand(0);
15734 
15735  return performCONDCombine(N, DCI, DAG, 2, 3);
15736 }
15737 
15739  assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
15740  SDValue LHS = N->getOperand(0);
15741  SDValue RHS = N->getOperand(1);
15742  ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
15743 
15744  // setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X
15745  if (Cond == ISD::SETNE && isOneConstant(RHS) &&
15746  LHS->getOpcode() == AArch64ISD::CSEL &&
15747  isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
15748  LHS->hasOneUse()) {
15749  SDLoc DL(N);
15750 
15751  // Invert CSEL's condition.
15752  auto *OpCC = cast<ConstantSDNode>(LHS.getOperand(2));
15753  auto OldCond = static_cast<AArch64CC::CondCode>(OpCC->getZExtValue());
15754  auto NewCond = getInvertedCondCode(OldCond);
15755 
15756  // csel 0, 1, !cond, X
15757  SDValue CSEL =
15758  DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0),
15759  LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32),
15760  LHS.getOperand(3));
15761  return DAG.getZExtOrTrunc(CSEL, DL, N->getValueType(0));
15762  }
15763 
15764  return SDValue();
15765 }
15766 
15768  assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
15769  "Unexpected opcode!");
15770 
15771  SDValue Pred = N->getOperand(0);
15772  SDValue LHS = N->getOperand(1);
15773  SDValue RHS = N->getOperand(2);
15774  ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
15775 
15776  // setcc_merge_zero pred (sign_extend (setcc_merge_zero ... pred ...)), 0, ne
15777  // => inner setcc_merge_zero
15778  if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) &&
15779  LHS->getOpcode() == ISD::SIGN_EXTEND &&
15780  LHS->getOperand(0)->getValueType(0) == N->getValueType(0) &&
15782  LHS->getOperand(0)->getOperand(0) == Pred)
15783  return LHS->getOperand(0);
15784 
15785  return SDValue();
15786 }
15787 
15788 // Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
15789 // as well as whether the test should be inverted. This code is required to
15790 // catch these cases (as opposed to standard dag combines) because
15791 // AArch64ISD::TBZ is matched during legalization.
15792 static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
15793  SelectionDAG &DAG) {
15794 
15795  if (!Op->hasOneUse())
15796  return Op;
15797 
15798  // We don't handle undef/constant-fold cases below, as they should have
15799  // already been taken care of (e.g. and of 0, test of undefined shifted bits,
15800  // etc.)
15801 
15802  // (tbz (trunc x), b) -> (tbz x, b)
15803  // This case is just here to enable more of the below cases to be caught.
15804  if (Op->getOpcode() == ISD::TRUNCATE &&
15805  Bit < Op->getValueType(0).getSizeInBits()) {
15806  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
15807  }
15808 
15809  // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
15810  if (Op->getOpcode() == ISD::ANY_EXTEND &&
15811  Bit < Op->getOperand(0).getValueSizeInBits()) {
15812  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
15813  }
15814 
15815  if (Op->getNumOperands() != 2)
15816  return Op;
15817 
15818  auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
15819  if (!C)
15820  return Op;
15821 
15822  switch (Op->getOpcode()) {
15823  default:
15824  return Op;
15825 
15826  // (tbz (and x, m), b) -> (tbz x, b)
15827  case ISD::AND:
15828  if ((C->getZExtValue() >> Bit) & 1)
15829  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
15830  return Op;
15831 
15832  // (tbz (shl x, c), b) -> (tbz x, b-c)
15833  case ISD::SHL:
15834  if (C->getZExtValue() <= Bit &&
15835  (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
15836  Bit = Bit - C->getZExtValue();
15837  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
15838  }
15839  return Op;
15840 
15841  // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
15842  case ISD::SRA:
15843  Bit = Bit + C->getZExtValue();
15844  if (Bit >= Op->getValueType(0).getSizeInBits())
15845  Bit = Op->getValueType(0).getSizeInBits() - 1;
15846  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
15847 
15848  // (tbz (srl x, c), b) -> (tbz x, b+c)
15849  case ISD::SRL:
15850  if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
15851  Bit = Bit + C->getZExtValue();
15852  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
15853  }
15854  return Op;
15855 
15856  // (tbz (xor x, -1), b) -> (tbnz x, b)
15857  case ISD::XOR:
15858  if ((C->getZExtValue() >> Bit) & 1)
15859  Invert = !Invert;
15860  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
15861  }
15862 }
15863 
15864 // Optimize test single bit zero/non-zero and branch.
15867  SelectionDAG &DAG) {
15868  unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
15869  bool Invert = false;
15870  SDValue TestSrc = N->getOperand(1);
15871  SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
15872 
15873  if (TestSrc == NewTestSrc)
15874  return SDValue();
15875 
15876  unsigned NewOpc = N->getOpcode();
15877  if (Invert) {
15878  if (NewOpc == AArch64ISD::TBZ)
15879  NewOpc = AArch64ISD::TBNZ;
15880  else {
15881  assert(NewOpc == AArch64ISD::TBNZ);
15882  NewOpc = AArch64ISD::TBZ;
15883  }
15884  }
15885 
15886  SDLoc DL(N);
15887  return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
15888  DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
15889 }
15890 
15891 // vselect (v1i1 setcc) ->
15892 // vselect (v1iXX setcc) (XX is the size of the compared operand type)
15893 // FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
15894 // condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
15895 // such VSELECT.
15897  SDValue N0 = N->getOperand(0);
15898  EVT CCVT = N0.getValueType();
15899 
15900  // Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
15901  // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
15902  // supported types.
15903  SDValue SetCC = N->getOperand(0);
15904  if (SetCC.getOpcode() == ISD::SETCC &&
15905  SetCC.getOperand(2) == DAG.getCondCode(ISD::SETGT)) {
15906  SDValue CmpLHS = SetCC.getOperand(0);
15907  EVT VT = CmpLHS.getValueType();
15908  SDNode *CmpRHS = SetCC.getOperand(1).getNode();
15909  SDNode *SplatLHS = N->getOperand(1).getNode();
15910  SDNode *SplatRHS = N->getOperand(2).getNode();
15911  APInt SplatLHSVal;
15912  if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
15913  VT.isSimple() &&
15914  is_contained(
15917  VT.getSimpleVT().SimpleTy) &&
15918  ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) &&
15919  SplatLHSVal.isOneValue() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
15921  unsigned NumElts = VT.getVectorNumElements();
15923  NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
15924  VT.getScalarType()));
15925  SDValue Val = DAG.getBuildVector(VT, SDLoc(N), Ops);
15926 
15927  auto Shift = DAG.getNode(ISD::SRA, SDLoc(N), VT, CmpLHS, Val);
15928  auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
15929  return Or;
15930  }
15931  }
15932 
15933  if (N0.getOpcode() != ISD::SETCC ||
15935  CCVT.getVectorElementType() != MVT::i1)
15936  return SDValue();
15937 
15938  EVT ResVT = N->getValueType(0);
15939  EVT CmpVT = N0.getOperand(0).getValueType();
15940  // Only combine when the result type is of the same size as the compared
15941  // operands.
15942  if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
15943  return SDValue();
15944 
15945  SDValue IfTrue = N->getOperand(1);
15946  SDValue IfFalse = N->getOperand(2);
15947  SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
15948  N0.getOperand(0), N0.getOperand(1),
15949  cast<CondCodeSDNode>(N0.getOperand(2))->get());
15950  return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
15951  IfTrue, IfFalse);
15952 }
15953 
15954 /// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
15955 /// the compare-mask instructions rather than going via NZCV, even if LHS and
15956 /// RHS are really scalar. This replaces any scalar setcc in the above pattern
15957 /// with a vector one followed by a DUP shuffle on the result.
15960  SelectionDAG &DAG = DCI.DAG;
15961  SDValue N0 = N->getOperand(0);
15962  EVT ResVT = N->getValueType(0);
15963 
15964  if (N0.getOpcode() != ISD::SETCC)
15965  return SDValue();
15966 
15967  if (ResVT.isScalableVector())
15968  return SDValue();
15969 
15970  // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
15971  // scalar SetCCResultType. We also don't expect vectors, because we assume
15972  // that selects fed by vector SETCCs are canonicalized to VSELECT.
15973  assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
15974  "Scalar-SETCC feeding SELECT has unexpected result type!");
15975 
15976  // If NumMaskElts == 0, the comparison is larger than select result. The
15977  // largest real NEON comparison is 64-bits per lane, which means the result is
15978  // at most 32-bits and an illegal vector. Just bail out for now.
15979  EVT SrcVT = N0.getOperand(0).getValueType();
15980 
15981  // Don't try to do this optimization when the setcc itself has i1 operands.
15982  // There are no legal vectors of i1, so this would be pointless.
15983  if (SrcVT == MVT::i1)
15984  return SDValue();
15985 
15986  int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
15987  if (!ResVT.isVector() || NumMaskElts == 0)
15988  return SDValue();
15989 
15990  SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
15991  EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
15992 
15993  // Also bail out if the vector CCVT isn't the same size as ResVT.
15994  // This can happen if the SETCC operand size doesn't divide the ResVT size
15995  // (e.g., f64 vs v3f32).
15996  if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
15997  return SDValue();
15998 
15999  // Make sure we didn't create illegal types, if we're not supposed to.
16000  assert(DCI.isBeforeLegalize() ||
16001  DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
16002 
16003  // First perform a vector comparison, where lane 0 is the one we're interested
16004  // in.
16005  SDLoc DL(N0);
16006  SDValue LHS =
16007  DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
16008  SDValue RHS =
16009  DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
16010  SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
16011 
16012  // Now duplicate the comparison mask we want across all other lanes.
16013  SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
16014  SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
16015  Mask = DAG.getNode(ISD::BITCAST, DL,
16017 
16018  return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
16019 }
16020 
16021 /// Get rid of unnecessary NVCASTs (that don't change the type).
16023  if (N->getValueType(0) == N->getOperand(0).getValueType())
16024  return N->getOperand(0);
16025 
16026  return SDValue();
16027 }
16028 
16029 // If all users of the globaladdr are of the form (globaladdr + constant), find
16030 // the smallest constant, fold it into the globaladdr's offset and rewrite the
16031 // globaladdr as (globaladdr + constant) - constant.
16033  const AArch64Subtarget *Subtarget,
16034  const TargetMachine &TM) {
16035  auto *GN = cast<GlobalAddressSDNode>(N);
16036  if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
16038  return SDValue();
16039 
16040  uint64_t MinOffset = -1ull;
16041  for (SDNode *N : GN->uses()) {
16042  if (N->getOpcode() != ISD::ADD)
16043  return SDValue();
16044  auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
16045  if (!C)
16046  C = dyn_cast<ConstantSDNode>(N->getOperand(1));
16047  if (!C)
16048  return SDValue();
16049  MinOffset = std::min(MinOffset, C->getZExtValue());
16050  }
16051  uint64_t Offset = MinOffset + GN->getOffset();
16052 
16053  // Require that the new offset is larger than the existing one. Otherwise, we
16054  // can end up oscillating between two possible DAGs, for example,
16055  // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
16056  if (Offset <= uint64_t(GN->getOffset()))
16057  return SDValue();
16058 
16059  // Check whether folding this offset is legal. It must not go out of bounds of
16060  // the referenced object to avoid violating the code model, and must be
16061  // smaller than 2^21 because this is the largest offset expressible in all
16062  // object formats.
16063  //
16064  // This check also prevents us from folding negative offsets, which will end
16065  // up being treated in the same way as large positive ones. They could also
16066  // cause code model violations, and aren't really common enough to matter.
16067  if (Offset >= (1 << 21))
16068  return SDValue();
16069 
16070  const GlobalValue *GV = GN->getGlobal();
16071  Type *T = GV->getValueType();
16072  if (!T->isSized() ||
16074  return SDValue();
16075 
16076  SDLoc DL(GN);
16077  SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
16078  return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
16079  DAG.getConstant(MinOffset, DL, MVT::i64));
16080 }
16081 
16082 // Turns the vector of indices into a vector of byte offstes by scaling Offset
16083 // by (BitWidth / 8).
16085  SDLoc DL, unsigned BitWidth) {
16086  assert(Offset.getValueType().isScalableVector() &&
16087  "This method is only for scalable vectors of offsets");
16088 
16090  SDValue SplatShift = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Shift);
16091 
16092  return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift);
16093 }
16094 
16095 /// Check if the value of \p OffsetInBytes can be used as an immediate for
16096 /// the gather load/prefetch and scatter store instructions with vector base and
16097 /// immediate offset addressing mode:
16098 ///
16099 /// [<Zn>.[S|D]{, #<imm>}]
16100 ///
16101 /// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
16102 inline static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes,
16103  unsigned ScalarSizeInBytes) {
16104  // The immediate is not a multiple of the scalar size.
16105  if (OffsetInBytes % ScalarSizeInBytes)
16106  return false;
16107 
16108  // The immediate is out of range.
16109  if (OffsetInBytes / ScalarSizeInBytes > 31)
16110  return false;
16111 
16112  return true;
16113 }
16114 
16115 /// Check if the value of \p Offset represents a valid immediate for the SVE
16116 /// gather load/prefetch and scatter store instructiona with vector base and
16117 /// immediate offset addressing mode:
16118 ///
16119 /// [<Zn>.[S|D]{, #<imm>}]
16120 ///
16121 /// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
16123  unsigned ScalarSizeInBytes) {
16124  ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
16125  return OffsetConst && isValidImmForSVEVecImmAddrMode(
16126  OffsetConst->getZExtValue(), ScalarSizeInBytes);
16127 }
16128 
16130  unsigned Opcode,
16131  bool OnlyPackedOffsets = true) {
16132  const SDValue Src = N->getOperand(2);
16133  const EVT SrcVT = Src->getValueType(0);
16134  assert(SrcVT.isScalableVector() &&
16135  "Scatter stores are only possible for SVE vectors");
16136 
16137  SDLoc DL(N);
16138  MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT();
16139 
16140  // Make sure that source data will fit into an SVE register
16142  return SDValue();
16143 
16144  // For FPs, ACLE only supports _packed_ single and double precision types.
16145  if (SrcElVT.isFloatingPoint())
16146  if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
16147  return SDValue();
16148 
16149  // Depending on the addressing mode, this is either a pointer or a vector of
16150  // pointers (that fits into one register)
16151  SDValue Base = N->getOperand(4);
16152  // Depending on the addressing mode, this is either a single offset or a
16153  // vector of offsets (that fits into one register)
16154  SDValue Offset = N->getOperand(5);
16155 
16156  // For "scalar + vector of indices", just scale the indices. This only
16157  // applies to non-temporal scatters because there's no instruction that takes
16158  // indicies.
16159  if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
16160  Offset =
16162  Opcode = AArch64ISD::SSTNT1_PRED;
16163  }
16164 
16165  // In the case of non-temporal gather loads there's only one SVE instruction
16166  // per data-size: "scalar + vector", i.e.
16167  // * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
16168  // Since we do have intrinsics that allow the arguments to be in a different
16169  // order, we may need to swap them to match the spec.
16170  if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector())
16171  std::swap(Base, Offset);
16172 
16173  // SST1_IMM requires that the offset is an immediate that is:
16174  // * a multiple of #SizeInBytes,
16175  // * in the range [0, 31 x #SizeInBytes],
16176  // where #SizeInBytes is the size in bytes of the stored items. For
16177  // immediates outside that range and non-immediate scalar offsets use SST1 or
16178  // SST1_UXTW instead.
16179  if (Opcode == AArch64ISD::SST1_IMM_PRED) {
16181  SrcVT.getScalarSizeInBits() / 8)) {
16182  if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
16183  Opcode = AArch64ISD::SST1_UXTW_PRED;
16184  else
16185  Opcode = AArch64ISD::SST1_PRED;
16186 
16187  std::swap(Base, Offset);
16188  }
16189  }
16190 
16191  auto &TLI = DAG.getTargetLoweringInfo();
16192  if (!TLI.isTypeLegal(Base.getValueType()))
16193  return SDValue();
16194 
16195  // Some scatter store variants allow unpacked offsets, but only as nxv2i32
16196  // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
16197  // nxv2i64. Legalize accordingly.
16198  if (!OnlyPackedOffsets &&
16199  Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
16201 
16202  if (!TLI.isTypeLegal(Offset.getValueType()))
16203  return SDValue();
16204 
16205  // Source value type that is representable in hardware
16206  EVT HwSrcVt = getSVEContainerType(SrcVT);
16207 
16208  // Keep the original type of the input data to store - this is needed to be
16209  // able to select the correct instruction, e.g. ST1B, ST1H, ST1W and ST1D. For
16210  // FP values we want the integer equivalent, so just use HwSrcVt.
16211  SDValue InputVT = DAG.getValueType(SrcVT);
16212  if (SrcVT.isFloatingPoint())
16213  InputVT = DAG.getValueType(HwSrcVt);
16214 
16215  SDVTList VTs = DAG.getVTList(MVT::Other);
16216  SDValue SrcNew;
16217 
16218  if (Src.getValueType().isFloatingPoint())
16219  SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
16220  else
16221  SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);
16222 
16223  SDValue Ops[] = {N->getOperand(0), // Chain
16224  SrcNew,
16225  N->getOperand(3), // Pg
16226  Base,
16227  Offset,
16228  InputVT};
16229 
16230  return DAG.getNode(Opcode, DL, VTs, Ops);
16231 }
16232 
16234  unsigned Opcode,
16235  bool OnlyPackedOffsets = true) {
16236  const EVT RetVT = N->getValueType(0);
16237  assert(RetVT.isScalableVector() &&
16238  "Gather loads are only possible for SVE vectors");
16239 
16240  SDLoc DL(N);
16241 
16242  // Make sure that the loaded data will fit into an SVE register
16244  return SDValue();
16245 
16246  // Depending on the addressing mode, this is either a pointer or a vector of
16247  // pointers (that fits into one register)
16248  SDValue Base = N->getOperand(3);
16249  // Depending on the addressing mode, this is either a single offset or a
16250  // vector of offsets (that fits into one register)
16251  SDValue Offset = N->getOperand(4);
16252 
16253  // For "scalar + vector of indices", just scale the indices. This only
16254  // applies to non-temporal gathers because there's no instruction that takes
16255  // indicies.
16256  if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
16258  RetVT.getScalarSizeInBits());
16260  }
16261 
16262  // In the case of non-temporal gather loads there's only one SVE instruction
16263  // per data-size: "scalar + vector", i.e.
16264  // * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
16265  // Since we do have intrinsics that allow the arguments to be in a different
16266  // order, we may need to swap them to match the spec.
16267  if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO &&
16268  Offset.getValueType().isVector())
16269  std::swap(Base, Offset);
16270 
16271  // GLD{FF}1_IMM requires that the offset is an immediate that is:
16272  // * a multiple of #SizeInBytes,
16273  // * in the range [0, 31 x #SizeInBytes],
16274  // where #SizeInBytes is the size in bytes of the loaded items. For
16275  // immediates outside that range and non-immediate scalar offsets use
16276  // GLD1_MERGE_ZERO or GLD1_UXTW_MERGE_ZERO instead.
16277  if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO ||
16280  RetVT.getScalarSizeInBits() / 8)) {
16281  if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
16282  Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
16285  else
16286  Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
16289 
16290  std::swap(Base, Offset);
16291  }
16292  }
16293 
16294  auto &TLI = DAG.getTargetLoweringInfo();
16295  if (!TLI.isTypeLegal(Base.getValueType()))
16296  return SDValue();
16297 
16298  // Some gather load variants allow unpacked offsets, but only as nxv2i32
16299  // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
16300  // nxv2i64. Legalize accordingly.
16301  if (!OnlyPackedOffsets &&
16302  Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
16304 
16305  // Return value type that is representable in hardware
16306  EVT HwRetVt = getSVEContainerType(RetVT);
16307 
16308  // Keep the original output value type around - this is needed to be able to
16309  // select the correct instruction, e.g. LD1B, LD1H, LD1W and LD1D. For FP
16310  // values we want the integer equivalent, so just use HwRetVT.
16311  SDValue OutVT = DAG.getValueType(RetVT);
16312  if (RetVT.isFloatingPoint())
16313  OutVT = DAG.getValueType(HwRetVt);
16314 
16315  SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);
16316  SDValue Ops[] = {N->getOperand(0), // Chain
16317  N->getOperand(2), // Pg
16318  Base, Offset, OutVT};
16319 
16320  SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);
16321  SDValue LoadChain = SDValue(Load.getNode(), 1);
16322 
16323  if (RetVT.isInteger() && (RetVT != HwRetVt))
16324  Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));
16325 
16326  // If the original return value was FP, bitcast accordingly. Doing it here
16327  // means that we can avoid adding TableGen patterns for FPs.
16328  if (RetVT.isFloatingPoint())
16329  Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));
16330 
16331  return DAG.getMergeValues({Load, LoadChain}, DL);
16332 }
16333 
16334 static SDValue
16336  SelectionDAG &DAG) {
16337  SDLoc DL(N);
16338  SDValue Src = N->getOperand(0);
16339  unsigned Opc = Src->getOpcode();
16340 
16341  // Sign extend of an unsigned unpack -> signed unpack
16342  if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
16343 
16344  unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
16346 
16347  // Push the sign extend to the operand of the unpack
16348  // This is necessary where, for example, the operand of the unpack
16349  // is another unpack:
16350  // 4i32 sign_extend_inreg (4i32 uunpklo(8i16 uunpklo (16i8 opnd)), from 4i8)
16351  // ->
16352  // 4i32 sunpklo (8i16 sign_extend_inreg(8i16 uunpklo (16i8 opnd), from 8i8)
16353  // ->
16354  // 4i32 sunpklo(8i16 sunpklo(16i8 opnd))
16355  SDValue ExtOp = Src->getOperand(0);
16356  auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
16357  EVT EltTy = VT.getVectorElementType();
16358  (void)EltTy;
16359 
16360  assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&
16361  "Sign extending from an invalid type");
16362 
16363  EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());
16364 
16366  ExtOp, DAG.getValueType(ExtVT));
16367 
16368  return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
16369  }
16370 
16371  if (DCI.isBeforeLegalizeOps())
16372  return SDValue();
16373 
16375  return SDValue();
16376 
16377  // SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
16378  // for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
16379  unsigned NewOpc;
16380  unsigned MemVTOpNum = 4;
16381  switch (Opc) {
16383  NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
16384  MemVTOpNum = 3;
16385  break;
16388  MemVTOpNum = 3;
16389  break;
16392  MemVTOpNum = 3;
16393  break;
16396  break;
16399  break;
16402  break;
16405  break;
16408  break;
16411  break;
16414  break;
16417  break;
16420  break;
16423  break;
16426  break;
16429  break;
16432  break;
16435  break;
16438  break;
16439  default:
16440  return SDValue();
16441  }
16442 
16443  EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16444  EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
16445 
16446  if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
16447  return SDValue();
16448 
16449  EVT DstVT = N->getValueType(0);
16450  SDVTList VTs = DAG.getVTList(DstVT, MVT::Other);
16451 
16453  for (unsigned I = 0; I < Src->getNumOperands(); ++I)
16454  Ops.push_back(Src->getOperand(I));
16455 
16456  SDValue ExtLoad = DAG.getNode(NewOpc, SDLoc(N), VTs, Ops);
16457  DCI.CombineTo(N, ExtLoad);
16458  DCI.CombineTo(Src.getNode(), ExtLoad, ExtLoad.getValue(1));
16459 
16460  // Return N so it doesn't get rechecked
16461  return SDValue(N, 0);
16462 }
16463 
16464 /// Legalize the gather prefetch (scalar + vector addressing mode) when the
16465 /// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
16466 /// != nxv2i32) do not need legalization.
16468  const unsigned OffsetPos = 4;
16469  SDValue Offset = N->getOperand(OffsetPos);
16470 
16471  // Not an unpacked vector, bail out.
16472  if (Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
16473  return SDValue();
16474 
16475  // Extend the unpacked offset vector to 64-bit lanes.
16476  SDLoc DL(N);
16478  SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
16479  // Replace the offset operand with the 64-bit one.
16480  Ops[OffsetPos] = Offset;
16481 
16482  return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
16483 }
16484 
16485 /// Combines a node carrying the intrinsic
16486 /// `aarch64_sve_prf<T>_gather_scalar_offset` into a node that uses
16487 /// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to
16488 /// `aarch64_sve_prf<T>_gather_scalar_offset` is not a valid immediate for the
16489 /// sve gather prefetch instruction with vector plus immediate addressing mode.
16491  unsigned ScalarSizeInBytes) {
16492  const unsigned ImmPos = 4, OffsetPos = 3;
16493  // No need to combine the node if the immediate is valid...
16494  if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
16495  return SDValue();
16496 
16497  // ...otherwise swap the offset base with the offset...
16498  SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
16499  std::swap(Ops[ImmPos], Ops[OffsetPos]);
16500  // ...and remap the intrinsic `aarch64_sve_prf<T>_gather_scalar_offset` to
16501  // `aarch64_sve_prfb_gather_uxtw_index`.
16502  SDLoc DL(N);
16503  Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL,
16504  MVT::i64);
16505 
16506  return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
16507 }
16508 
16509 // Return true if the vector operation can guarantee only the first lane of its
16510 // result contains data, with all bits in other lanes set to zero.
16512  switch (Op.getOpcode()) {
16513  default:
16514  return false;
16515  case AArch64ISD::ANDV_PRED:
16516  case AArch64ISD::EORV_PRED:
16523  case AArch64ISD::ORV_PRED:
16530  return true;
16531  }
16532 }
16533 
16535  assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!");
16536  SDValue InsertVec = N->getOperand(0);
16537  SDValue InsertElt = N->getOperand(1);
16538  SDValue InsertIdx = N->getOperand(2);
16539 
16540  // We only care about inserts into the first element...
16541  if (!isNullConstant(InsertIdx))
16542  return SDValue();
16543  // ...of a zero'd vector...
16544  if (!ISD::isConstantSplatVectorAllZeros(InsertVec.getNode()))
16545  return SDValue();
16546  // ...where the inserted data was previously extracted...
16547  if (InsertElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16548  return SDValue();
16549 
16550  SDValue ExtractVec = InsertElt.getOperand(0);
16551  SDValue ExtractIdx = InsertElt.getOperand(1);
16552 
16553  // ...from the first element of a vector.
16554  if (!isNullConstant(ExtractIdx))
16555  return SDValue();
16556 
16557  // If we get here we are effectively trying to zero lanes 1-N of a vector.
16558 
16559  // Ensure there's no type conversion going on.
16560  if (N->getValueType(0) != ExtractVec.getValueType())
16561  return SDValue();
16562 
16563  if (!isLanes1toNKnownZero(ExtractVec))
16564  return SDValue();
16565 
16566  // The explicit zeroing is redundant.
16567  return ExtractVec;
16568 }
16569 
16570 static SDValue
16573  return Res;
16574 
16575  return performPostLD1Combine(N, DCI, true);
16576 }
16577 
16579  EVT Ty = N->getValueType(0);
16580  if (Ty.isInteger())
16581  return SDValue();
16582 
16584  EVT ExtIntTy = getPackedSVEVectorVT(IntTy.getVectorElementCount());
16585  if (ExtIntTy.getVectorElementType().getScalarSizeInBits() <
16587  return SDValue();
16588 
16589  SDLoc DL(N);
16590  SDValue LHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(0)),
16591  DL, ExtIntTy);
16592  SDValue RHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(1)),
16593  DL, ExtIntTy);
16594  SDValue Idx = N->getOperand(2);
16595  SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ExtIntTy, LHS, RHS, Idx);
16596  SDValue Trunc = DAG.getAnyExtOrTrunc(Splice, DL, IntTy);
16597  return DAG.getBitcast(Ty, Trunc);
16598 }
16599 
16601  DAGCombinerInfo &DCI) const {
16602  SelectionDAG &DAG = DCI.DAG;
16603  switch (N->getOpcode()) {
16604  default:
16605  LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
16606  break;
16607  case ISD::ADD:
16608  case ISD::SUB:
16609  return performAddSubCombine(N, DCI, DAG);
16610  case ISD::XOR:
16611  return performXorCombine(N, DAG, DCI, Subtarget);
16612  case ISD::MUL:
16613  return performMulCombine(N, DAG, DCI, Subtarget);
16614  case ISD::SINT_TO_FP:
16615  case ISD::UINT_TO_FP:
16616  return performIntToFpCombine(N, DAG, Subtarget);
16617  case ISD::FP_TO_SINT:
16618  case ISD::FP_TO_UINT:
16619  return performFpToIntCombine(N, DAG, DCI, Subtarget);
16620  case ISD::FDIV:
16621  return performFDivCombine(N, DAG, DCI, Subtarget);
16622  case ISD::OR:
16623  return performORCombine(N, DCI, Subtarget);
16624  case ISD::AND:
16625  return performANDCombine(N, DCI);
16626  case ISD::SRL:
16627  return performSRLCombine(N, DCI);
16629  return performIntrinsicCombine(N, DCI, Subtarget);
16630  case ISD::ANY_EXTEND:
16631  case ISD::ZERO_EXTEND:
16632  case ISD::SIGN_EXTEND:
16633  return performExtendCombine(N, DCI, DAG);
16635  return performSignExtendInRegCombine(N, DCI, DAG);
16636  case ISD::TRUNCATE:
16637  return performVectorTruncateCombine(N, DCI, DAG);
16638  case ISD::CONCAT_VECTORS:
16639  return performConcatVectorsCombine(N, DCI, DAG);
16640  case ISD::SELECT:
16641  return performSelectCombine(N, DCI);
16642  case ISD::VSELECT:
16643  return performVSelectCombine(N, DCI.DAG);
16644  case ISD::SETCC:
16645  return performSETCCCombine(N, DAG);
16646  case ISD::LOAD:
16647  if (performTBISimplification(N->getOperand(1), DCI, DAG))
16648  return SDValue(N, 0);
16649  break;
16650  case ISD::STORE:
16651  return performSTORECombine(N, DCI, DAG, Subtarget);
16652  case ISD::VECTOR_SPLICE:
16653  return performSVESpliceCombine(N, DAG);
16654  case AArch64ISD::BRCOND:
16655  return performBRCONDCombine(N, DCI, DAG);
16656  case AArch64ISD::TBNZ:
16657  case AArch64ISD::TBZ:
16658  return performTBZCombine(N, DCI, DAG);
16659  case AArch64ISD::CSEL:
16660  return performCSELCombine(N, DCI, DAG);
16661  case AArch64ISD::DUP:
16662  return performPostLD1Combine(N, DCI, false);
16663  case AArch64ISD::NVCAST:
16664  return performNVCASTCombine(N);
16665  case AArch64ISD::SPLICE:
16666  return performSpliceCombine(N, DAG);
16667  case AArch64ISD::UZP1:
16668  return performUzpCombine(N, DAG);
16670  return performSetccMergeZeroCombine(N, DAG);
16685  return performGLD1Combine(N, DAG);
16686  case AArch64ISD::VASHR:
16687  case AArch64ISD::VLSHR:
16688  return performVectorShiftCombine(N, *this, DCI);
16690  return performInsertVectorEltCombine(N, DCI);
16692  return performExtractVectorEltCombine(N, DAG);
16693  case ISD::VECREDUCE_ADD:
16694  return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
16695  case ISD::INTRINSIC_VOID:
16697  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
16698  case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
16699  return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/);
16700  case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
16701  return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /*=ScalarSizeInBytes*/);
16702  case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
16703  return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /*=ScalarSizeInBytes*/);
16704  case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
16705  return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /*=ScalarSizeInBytes*/);
16706  case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
16707  case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
16708  case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
16709  case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
16710  case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
16711  case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
16712  case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
16713  case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
16714  return legalizeSVEGatherPrefetchOffsVec(N, DAG);
16715  case Intrinsic::aarch64_neon_ld2:
16716  case Intrinsic::aarch64_neon_ld3:
16717  case Intrinsic::aarch64_neon_ld4:
16718  case Intrinsic::aarch64_neon_ld1x2:
16719  case Intrinsic::aarch64_neon_ld1x3:
16720  case Intrinsic::aarch64_neon_ld1x4:
16721  case Intrinsic::aarch64_neon_ld2lane:
16722  case Intrinsic::aarch64_neon_ld3lane:
16723  case Intrinsic::aarch64_neon_ld4lane:
16724  case Intrinsic::aarch64_neon_ld2r:
16725  case Intrinsic::aarch64_neon_ld3r:
16726  case Intrinsic::aarch64_neon_ld4r:
16727  case Intrinsic::aarch64_neon_st2:
16728  case Intrinsic::aarch64_neon_st3:
16729  case Intrinsic::aarch64_neon_st4:
16730  case Intrinsic::aarch64_neon_st1x2:
16731  case Intrinsic::aarch64_neon_st1x3:
16732  case Intrinsic::aarch64_neon_st1x4:
16733  case Intrinsic::aarch64_neon_st2lane:
16734  case Intrinsic::aarch64_neon_st3lane:
16735  case Intrinsic::aarch64_neon_st4lane:
16736  return performNEONPostLDSTCombine(N, DCI, DAG);
16737  case Intrinsic::aarch64_sve_ldnt1:
16738  return performLDNT1Combine(N, DAG);
16739  case Intrinsic::aarch64_sve_ld1rq:
16740  return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(N, DAG);
16741  case Intrinsic::aarch64_sve_ld1ro:
16742  return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(N, DAG);
16743  case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
16745  case Intrinsic::aarch64_sve_ldnt1_gather:
16747  case Intrinsic::aarch64_sve_ldnt1_gather_index:
16748  return performGatherLoadCombine(N, DAG,
16750  case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
16752  case Intrinsic::aarch64_sve_ld1:
16754  case Intrinsic::aarch64_sve_ldnf1:
16756  case Intrinsic::aarch64_sve_ldff1:
16758  case Intrinsic::aarch64_sve_st1:
16759  return performST1Combine(N, DAG);
16760  case Intrinsic::aarch64_sve_stnt1:
16761  return performSTNT1Combine(N, DAG);
16762  case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
16764  case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
16766  case Intrinsic::aarch64_sve_stnt1_scatter:
16768  case Intrinsic::aarch64_sve_stnt1_scatter_index:
16770  case Intrinsic::aarch64_sve_ld1_gather:
16772  case Intrinsic::aarch64_sve_ld1_gather_index:
16773  return performGatherLoadCombine(N, DAG,
16775  case Intrinsic::aarch64_sve_ld1_gather_sxtw:
16777  /*OnlyPackedOffsets=*/false);
16778  case Intrinsic::aarch64_sve_ld1_gather_uxtw:
16780  /*OnlyPackedOffsets=*/false);
16781  case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
16782  return performGatherLoadCombine(N, DAG,
16784  /*OnlyPackedOffsets=*/false);
16785  case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
16786  return performGatherLoadCombine(N, DAG,
16788  /*OnlyPackedOffsets=*/false);
16789  case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
16791  case Intrinsic::aarch64_sve_ldff1_gather:
16793  case Intrinsic::aarch64_sve_ldff1_gather_index:
16794  return performGatherLoadCombine(N, DAG,
16796  case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
16797  return performGatherLoadCombine(N, DAG,
16799  /*OnlyPackedOffsets=*/false);
16800  case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
16801  return performGatherLoadCombine(N, DAG,
16803  /*OnlyPackedOffsets=*/false);
16804  case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
16805  return performGatherLoadCombine(N, DAG,
16807  /*OnlyPackedOffsets=*/false);
16808  case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
16809  return performGatherLoadCombine(N, DAG,
16811  /*OnlyPackedOffsets=*/false);
16812  case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
16813  return performGatherLoadCombine(N, DAG,
16815  case Intrinsic::aarch64_sve_st1_scatter:
16817  case Intrinsic::aarch64_sve_st1_scatter_index:
16819  case Intrinsic::aarch64_sve_st1_scatter_sxtw:
16821  /*OnlyPackedOffsets=*/false);
16822  case Intrinsic::aarch64_sve_st1_scatter_uxtw:
16824  /*OnlyPackedOffsets=*/false);
16825  case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
16826  return performScatterStoreCombine(N, DAG,
16828  /*OnlyPackedOffsets=*/false);
16829  case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
16830  return performScatterStoreCombine(N, DAG,
16832  /*OnlyPackedOffsets=*/false);
16833  case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
16835  case Intrinsic::aarch64_sve_tuple_get: {
16836  SDLoc DL(N);
16837  SDValue Chain = N->getOperand(0);
16838  SDValue Src1 = N->getOperand(2);
16839  SDValue Idx = N->getOperand(3);
16840 
16841  uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
16842  EVT ResVT = N->getValueType(0);
16843  uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
16844  SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
16845  SDValue Val =
16846  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
16847  return DAG.getMergeValues({Val, Chain}, DL);
16848  }
16849  case Intrinsic::aarch64_sve_tuple_set: {
16850  SDLoc DL(N);
16851  SDValue Chain = N->getOperand(0);
16852  SDValue Tuple = N->getOperand(2);
16853  SDValue Idx = N->getOperand(3);
16854  SDValue Vec = N->getOperand(4);
16855 
16856  EVT TupleVT = Tuple.getValueType();
16857  uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();
16858 
16859  uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
16860  uint64_t NumLanes =
16862 
16863  if ((TupleLanes % NumLanes) != 0)
16864  report_fatal_error("invalid tuple vector!");
16865 
16866  uint64_t NumVecs = TupleLanes / NumLanes;
16867 
16869  for (unsigned I = 0; I < NumVecs; ++I) {
16870  if (I == IdxConst)
16871  Opnds.push_back(Vec);
16872  else {
16873  SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
16874  Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
16875  Vec.getValueType(), Tuple, ExtIdx));
16876  }
16877  }
16878  SDValue Concat =
16879  DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds);
16880  return DAG.getMergeValues({Concat, Chain}, DL);
16881  }
16882  case Intrinsic::aarch64_sve_tuple_create2:
16883  case Intrinsic::aarch64_sve_tuple_create3:
16884  case Intrinsic::aarch64_sve_tuple_create4: {
16885  SDLoc DL(N);
16886  SDValue Chain = N->getOperand(0);
16887 
16889  for (unsigned I = 2; I < N->getNumOperands(); ++I)
16890  Opnds.push_back(N->getOperand(I));
16891 
16892  EVT VT = Opnds[0].getValueType();
16893  EVT EltVT = VT.getVectorElementType();
16894  EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
16895  VT.getVectorElementCount() *
16896  (N->getNumOperands() - 2));
16897  SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds);
16898  return DAG.getMergeValues({Concat, Chain}, DL);
16899  }
16900  case Intrinsic::aarch64_sve_ld2:
16901  case Intrinsic::aarch64_sve_ld3:
16902  case Intrinsic::aarch64_sve_ld4: {
16903  SDLoc DL(N);
16904  SDValue Chain = N->getOperand(0);
16905  SDValue Mask = N->getOperand(2);
16906  SDValue BasePtr = N->getOperand(3);
16907  SDValue LoadOps[] = {Chain, Mask, BasePtr};
16908  unsigned IntrinsicID =
16909  cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
16910  SDValue Result =
16911  LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL);
16912  return DAG.getMergeValues({Result, Chain}, DL);
16913  }
16914  case Intrinsic::aarch64_rndr:
16915  case Intrinsic::aarch64_rndrrs: {
16916  unsigned IntrinsicID =
16917  cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
16918  auto Register =
16919  (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
16920  : AArch64SysReg::RNDRRS);
16921  SDLoc DL(N);
16922  SDValue A = DAG.getNode(
16924  N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64));
16925  SDValue B = DAG.getNode(
16927  DAG.getConstant(0, DL, MVT::i32),
16928  DAG.getConstant(AArch64CC::NE, DL, MVT::i32), A.getValue(1));
16929  return DAG.getMergeValues(
16930  {A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
16931  }
16932  default:
16933  break;
16934  }
16935  break;
16936  case ISD::GlobalAddress:
16937  return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
16938  }
16939  return SDValue();
16940 }
16941 
16942 // Check if the return value is used as only a return value, as otherwise
16943 // we can't perform a tail-call. In particular, we need to check for
16944 // target ISD nodes that are returns and any other "odd" constructs
16945 // that the generic analysis code won't necessarily catch.
16946 bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
16947  SDValue &Chain) const {
16948  if (N->getNumValues() != 1)
16949  return false;
16950  if (!N->hasNUsesOfValue(1, 0))
16951  return false;
16952 
16953  SDValue TCChain = Chain;
16954  SDNode *Copy = *N->use_begin();
16955  if (Copy->getOpcode() == ISD::CopyToReg) {
16956  // If the copy has a glue operand, we conservatively assume it isn't safe to
16957  // perform a tail call.
16958  if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
16959  MVT::Glue)
16960  return false;
16961  TCChain = Copy->getOperand(0);
16962  } else if (Copy->getOpcode() != ISD::FP_EXTEND)
16963  return false;
16964 
16965  bool HasRet = false;
16966  for (SDNode *Node : Copy->uses()) {
16967  if (Node->getOpcode() != AArch64ISD::RET_FLAG)
16968  return false;
16969  HasRet = true;
16970  }
16971 
16972  if (!HasRet)
16973  return false;
16974 
16975  Chain = TCChain;
16976  return true;
16977 }
16978 
16979 // Return whether the an instruction can potentially be optimized to a tail
16980 // call. This will cause the optimizers to attempt to move, or duplicate,
16981 // return instructions to help enable tail call optimizations for this
16982 // instruction.
16983 bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16984  return CI->isTailCall();
16985 }
16986 
16987 bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
16988  SDValue &Offset,
16989  ISD::MemIndexedMode &AM,
16990  bool &IsInc,
16991  SelectionDAG &DAG) const {
16992  if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
16993  return false;
16994 
16995  Base = Op->getOperand(0);
16996  // All of the indexed addressing mode instructions take a signed
16997  // 9 bit immediate offset.
16998  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
16999  int64_t RHSC = RHS->getSExtValue();
17000  if (Op->getOpcode() == ISD::SUB)
17001  RHSC = -(uint64_t)RHSC;
17002  if (!isInt<9>(RHSC))
17003  return false;
17004  IsInc = (Op->getOpcode() == ISD::ADD);
17005  Offset = Op->getOperand(1);
17006  return true;
17007  }
17008  return false;
17009 }
17010 
17011 bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
17012  SDValue &Offset,
17013  ISD::MemIndexedMode &AM,
17014  SelectionDAG &DAG) const {
17015  EVT VT;
17016  SDValue Ptr;
17017  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
17018  VT = LD->getMemoryVT();
17019  Ptr = LD->getBasePtr();
17020  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
17021  VT = ST->getMemoryVT();
17022  Ptr = ST->getBasePtr();
17023  } else
17024  return false;
17025 
17026  bool IsInc;
17027  if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
17028  return false;
17029  AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
17030  return true;
17031 }
17032 
17033 bool AArch64TargetLowering::getPostIndexedAddressParts(
17035  ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
17036  EVT VT;
17037  SDValue Ptr;
17038  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
17039  VT = LD->getMemoryVT();
17040  Ptr = LD->getBasePtr();
17041  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
17042  VT = ST->getMemoryVT();
17043  Ptr = ST->getBasePtr();
17044  } else
17045  return false;
17046 
17047  bool IsInc;
17048  if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
17049  return false;
17050  // Post-indexing updates the base, so it's not a valid transform
17051  // if that's not the same as the load's pointer.
17052  if (Ptr != Base)
17053  return false;
17054  AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
17055  return true;
17056 }
17057 
17058 void AArch64TargetLowering::ReplaceBITCASTResults(
17060  SDLoc DL(N);
17061  SDValue Op = N->getOperand(0);
17062  EVT VT = N->getValueType(0);
17063  EVT SrcVT = Op.getValueType();
17064 
17065  if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) {
17066  assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17067  "Expected fp->int bitcast!");
17068  SDValue CastResult = getSVESafeBitCast(getSVEContainerType(VT), Op, DAG);
17069  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, CastResult));
17070  return;
17071  }
17072 
17073  if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
17074  return;
17075 
17076  Op = SDValue(
17077  DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
17078  DAG.getUNDEF(MVT::i32), Op,
17079  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
17080  0);
17081  Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
17082  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
17083 }
17084 
17087  SelectionDAG &DAG, unsigned InterOp,
17088  unsigned AcrossOp) {
17089  EVT LoVT, HiVT;
17090  SDValue Lo, Hi;
17091  SDLoc dl(N);
17092  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
17093  std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
17094  SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
17095  SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
17096  Results.push_back(SplitVal);
17097 }
17098 
17099 static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
17100  SDLoc DL(N);
17103  DAG.getNode(ISD::SRL, DL, MVT::i128, N,
17104  DAG.getConstant(64, DL, MVT::i64)));
17105  return std::make_pair(Lo, Hi);
17106 }
17107 
17108 void AArch64TargetLowering::ReplaceExtractSubVectorResults(
17110  SDValue In = N->getOperand(0);
17111  EVT InVT = In.getValueType();
17112 
17113  // Common code will handle these just fine.
17114  if (!InVT.isScalableVector() || !InVT.isInteger())
17115  return;
17116 
17117  SDLoc DL(N);
17118  EVT VT = N->getValueType(0);
17119 
17120  // The following checks bail if this is not a halving operation.
17121 
17122  ElementCount ResEC = VT.getVectorElementCount();
17123 
17124  if (InVT.getVectorElementCount() != (ResEC * 2))
17125  return;
17126 
17127  auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
17128  if (!CIndex)
17129  return;
17130 
17131  unsigned Index = CIndex->getZExtValue();
17132  if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
17133  return;
17134 
17135  unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
17136  EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());
17137 
17138  SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
17139  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Half));
17140 }
17141 
17142 // Create an even/odd pair of X registers holding integer value V.
17144  SDLoc dl(V.getNode());
17145  SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
17146  SDValue VHi = DAG.getAnyExtOrTrunc(
17147  DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
17148  dl, MVT::i64);
17149  if (DAG.getDataLayout().isBigEndian())
17150  std::swap (VLo, VHi);
17151  SDValue RegClass =
17152  DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
17153  SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
17154  SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
17155  const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
17156  return SDValue(
17157  DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
17158 }
17159 
17162  SelectionDAG &DAG,
17163  const AArch64Subtarget *Subtarget) {
17164  assert(N->getValueType(0) == MVT::i128 &&
17165  "AtomicCmpSwap on types less than 128 should be legal");
17166 
17167  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
17168  if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
17169  // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
17170  // so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
17171  SDValue Ops[] = {
17172  createGPRPairNode(DAG, N->getOperand(2)), // Compare value
17173  createGPRPairNode(DAG, N->getOperand(3)), // Store value
17174  N->getOperand(1), // Ptr
17175  N->getOperand(0), // Chain in
17176  };
17177 
17178  unsigned Opcode;
17179  switch (MemOp->getMergedOrdering()) {
17180  case AtomicOrdering::Monotonic:
17181  Opcode = AArch64::CASPX;
17182  break;
17183  case AtomicOrdering::Acquire:
17184  Opcode = AArch64::CASPAX;
17185  break;
17187  Opcode = AArch64::CASPLX;
17188  break;
17189  case AtomicOrdering::AcquireRelease:
17190  case AtomicOrdering::SequentiallyConsistent:
17191  Opcode = AArch64::CASPALX;
17192  break;
17193  default:
17194  llvm_unreachable("Unexpected ordering!");
17195  }
17196 
17197  MachineSDNode *CmpSwap = DAG.getMachineNode(
17198  Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
17199  DAG.setNodeMemRefs(CmpSwap, {MemOp});
17200 
17201  unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
17202  if (DAG.getDataLayout().isBigEndian())
17203  std::swap(SubReg1, SubReg2);
17204  SDValue Lo = DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
17205  SDValue(CmpSwap, 0));
17206  SDValue Hi = DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
17207  SDValue(CmpSwap, 0));
17208  Results.push_back(
17210  Results.push_back(SDValue(CmpSwap, 1)); // Chain out
17211  return;
17212  }
17213 
17214  unsigned Opcode;
17215  switch (MemOp->getMergedOrdering()) {
17216  case AtomicOrdering::Monotonic:
17217  Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
17218  break;
17219  case AtomicOrdering::Acquire:
17220  Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
17221  break;
17223  Opcode = AArch64::CMP_SWAP_128_RELEASE;
17224  break;
17225  case AtomicOrdering::AcquireRelease:
17226  case AtomicOrdering::SequentiallyConsistent:
17227  Opcode = AArch64::CMP_SWAP_128;
17228  break;
17229  default:
17230  llvm_unreachable("Unexpected ordering!");
17231  }
17232 
17233  auto Desired = splitInt128(N->getOperand(2), DAG);
17234  auto New = splitInt128(N->getOperand(3), DAG);
17235  SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
17236  New.first, New.second, N->getOperand(0)};
17237  SDNode *CmpSwap = DAG.getMachineNode(
17238  Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other),
17239  Ops);
17240  DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
17241 
17242  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
17243  SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
17244  Results.push_back(SDValue(CmpSwap, 3));
17245 }
17246 
17247 void AArch64TargetLowering::ReplaceNodeResults(
17249  switch (N->getOpcode()) {
17250  default:
17251  llvm_unreachable("Don't know how to custom expand this");
17252  case ISD::BITCAST:
17253  ReplaceBITCASTResults(N, Results, DAG);
17254  return;
17255  case ISD::VECREDUCE_ADD:
17256  case ISD::VECREDUCE_SMAX:
17257  case ISD::VECREDUCE_SMIN:
17258  case ISD::VECREDUCE_UMAX:
17259  case ISD::VECREDUCE_UMIN:
17260  Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
17261  return;
17262 
17263  case ISD::CTPOP:
17264  if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
17265  Results.push_back(Result);
17266  return;
17267  case AArch64ISD::SADDV:
17269  return;
17270  case AArch64ISD::UADDV:
17272  return;
17273  case AArch64ISD::SMINV:
17275  return;
17276  case AArch64ISD::UMINV:
17278  return;
17279  case AArch64ISD::SMAXV:
17281  return;
17282  case AArch64ISD::UMAXV:
17284  return;
17285  case ISD::FP_TO_UINT:
17286  case ISD::FP_TO_SINT:
17287  assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
17288  // Let normal code take care of it by not adding anything to Results.
17289  return;
17290  case ISD::ATOMIC_CMP_SWAP:
17291  ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
17292  return;
17293  case ISD::LOAD: {
17294  assert(SDValue(N, 0).getValueType() == MVT::i128 &&
17295  "unexpected load's value type");
17296  LoadSDNode *LoadNode = cast<LoadSDNode>(N);
17297  if (!LoadNode->isVolatile() || LoadNode->getMemoryVT() != MVT::i128) {
17298  // Non-volatile loads are optimized later in AArch64's load/store
17299  // optimizer.
17300  return;
17301  }
17302 
17305  DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
17306  {LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(),
17307  LoadNode->getMemOperand());
17308 
17310  Result.getValue(0), Result.getValue(1));
17311  Results.append({Pair, Result.getValue(2) /* Chain */});
17312  return;
17313  }
17315  ReplaceExtractSubVectorResults(N, Results, DAG);
17316  return;
17317  case ISD::INSERT_SUBVECTOR:
17318  // Custom lowering has been requested for INSERT_SUBVECTOR -- but delegate
17319  // to common code for result type legalisation
17320  return;
17321  case ISD::INTRINSIC_WO_CHAIN: {
17322  EVT VT = N->getValueType(0);
17323  assert((VT == MVT::i8 || VT == MVT::i16) &&
17324  "custom lowering for unexpected type");
17325 
17326  ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(0));
17327  Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
17328  switch (IntID) {
17329  default:
17330  return;
17331  case Intrinsic::aarch64_sve_clasta_n: {
17332  SDLoc DL(N);
17333  auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
17334  auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
17335  N->getOperand(1), Op2, N->getOperand(3));
17336  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17337  return;
17338  }
17339  case Intrinsic::aarch64_sve_clastb_n: {
17340  SDLoc DL(N);
17341  auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
17342  auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
17343  N->getOperand(1), Op2, N->getOperand(3));
17344  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17345  return;
17346  }
17347  case Intrinsic::aarch64_sve_lasta: {
17348  SDLoc DL(N);
17349  auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
17350  N->getOperand(1), N->getOperand(2));
17351  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17352  return;
17353  }
17354  case Intrinsic::aarch64_sve_lastb: {
17355  SDLoc DL(N);
17356  auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
17357  N->getOperand(1), N->getOperand(2));
17358  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17359  return;
17360  }
17361  }
17362  }
17363  }
17364 }
17365 
17367  if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
17369  return true;
17370 }
17371 
17372 unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
17373  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
17374  // reciprocal if there are three or more FDIVs.
17375  return 3;
17376 }
17377 
17380  // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
17381  // v4i16, v2i32 instead of to promote.
17382  if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
17383  VT == MVT::v1f32)
17384  return TypeWidenVector;
17385 
17387 }
17388 
17389 // Loads and stores less than 128-bits are already atomic; ones above that
17390 // are doomed anyway, so defer to the default libcall and blame the OS when
17391 // things go wrong.
17393  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
17394  return Size == 128;
17395 }
17396 
17397 // Loads and stores less than 128-bits are already atomic; ones above that
17398 // are doomed anyway, so defer to the default libcall and blame the OS when
17399 // things go wrong.
17402  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
17404 }
17405 
17406 // For the real atomic operations, we have ldxr/stxr up to 128 bits,
17409  if (AI->isFloatingPointOperation())
17411 
17412  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
17413  if (Size > 128) return AtomicExpansionKind::None;
17414 
17415  // Nand is not supported in LSE.
17416  // Leave 128 bits to LLSC or CmpXChg.
17417  if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
17418  if (Subtarget->hasLSE())
17420  if (Subtarget->outlineAtomics()) {
17421  // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
17422  // Don't outline them unless
17423  // (1) high level <atomic> support approved:
17424  // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
17425  // (2) low level libgcc and compiler-rt support implemented by:
17426  // min/max outline atomics helpers
17427  if (AI->getOperation() != AtomicRMWInst::Min &&
17428  AI->getOperation() != AtomicRMWInst::Max &&
17429  AI->getOperation() != AtomicRMWInst::UMin &&
17430  AI->getOperation() != AtomicRMWInst::UMax) {
17432  }
17433  }
17434  }
17435 
17436  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
17437  // implement atomicrmw without spilling. If the target address is also on the
17438  // stack and close enough to the spill slot, this can lead to a situation
17439  // where the monitor always gets cleared and the atomic operation can never
17440  // succeed. So at -O0 lower this operation to a CAS loop.
17441  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
17443 
17445 }
17446 
17449  AtomicCmpXchgInst *AI) const {
17450  // If subtarget has LSE, leave cmpxchg intact for codegen.
17451  if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
17453  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
17454  // implement cmpxchg without spilling. If the address being exchanged is also
17455  // on the stack and close enough to the spill slot, this can lead to a
17456  // situation where the monitor always gets cleared and the atomic operation
17457  // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
17458  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
17460 
17461  // 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
17462  // it.
17463  unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
17464  if (Size > 64)
17466 
17468 }
17469 
17471  Type *ValueTy, Value *Addr,
17472  AtomicOrdering Ord) const {
17473  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17474  bool IsAcquire = isAcquireOrStronger(Ord);
17475 
17476  // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
17477  // intrinsic must return {i64, i64} and we have to recombine them into a
17478  // single i128 here.
17479  if (ValueTy->getPrimitiveSizeInBits() == 128) {
17480  Intrinsic::ID Int =
17481  IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
17483 
17484  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
17485  Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
17486 
17487  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
17488  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
17489  Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
17490  Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
17491  return Builder.CreateOr(
17492  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
17493  }
17494 
17495  Type *Tys[] = { Addr->getType() };
17496  Intrinsic::ID Int =
17497  IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
17498  Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
17499 
17500  const DataLayout &DL = M->getDataLayout();
17501  IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
17502  Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy);
17503 
17504  return Builder.CreateBitCast(Trunc, ValueTy);
17505 }
17506 
17508  IRBuilderBase &Builder) const {
17509  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17510  Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
17511 }
17512 
17514  Value *Val, Value *Addr,
17515  AtomicOrdering Ord) const {
17516  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17517  bool IsRelease = isReleaseOrStronger(Ord);
17518 
17519  // Since the intrinsics must have legal type, the i128 intrinsics take two
17520  // parameters: "i64, i64". We must marshal Val into the appropriate form
17521  // before the call.
17522  if (Val->getType()->getPrimitiveSizeInBits() == 128) {
17523  Intrinsic::ID Int =
17524  IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
17526  Type *Int64Ty = Type::getInt64Ty(M->getContext());
17527 
17528  Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
17529  Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
17530  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
17531  return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
17532  }
17533 
17534  Intrinsic::ID Int =
17535  IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
17536  Type *Tys[] = { Addr->getType() };
17537  Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
17538 
17539  const DataLayout &DL = M->getDataLayout();
17540  IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
17541  Val = Builder.CreateBitCast(Val, IntValTy);
17542 
17543  return Builder.CreateCall(Stxr,
17544  {Builder.CreateZExtOrBitCast(
17545  Val, Stxr->getFunctionType()->getParamType(0)),
17546  Addr});
17547 }
17548 
17550  Type *Ty, CallingConv::ID CallConv, bool isVarArg,
17551  const DataLayout &DL) const {
17552  if (!Ty->isArrayTy()) {
17553  const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
17554  return TySize.isScalable() && TySize.getKnownMinSize() > 128;
17555  }
17556 
17557  // All non aggregate members of the type must have the same type
17558  SmallVector<EVT> ValueVTs;
17559  ComputeValueVTs(*this, DL, Ty, ValueVTs);
17560  return is_splat(ValueVTs);
17561 }
17562 
17563 bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
17564  EVT) const {
17565  return false;
17566 }
17567 
17568 static Value *UseTlsOffset(IRBuilderBase &IRB, unsigned Offset) {
17569  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
17570  Function *ThreadPointerFunc =
17571  Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
17572  return IRB.CreatePointerCast(
17573  IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc),
17574  Offset),
17575  IRB.getInt8PtrTy()->getPointerTo(0));
17576 }
17577 
17579  // Android provides a fixed TLS slot for the stack cookie. See the definition
17580  // of TLS_SLOT_STACK_GUARD in
17581  // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
17582  if (Subtarget->isTargetAndroid())
17583  return UseTlsOffset(IRB, 0x28);
17584 
17585  // Fuchsia is similar.
17586  // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
17587  if (Subtarget->isTargetFuchsia())
17588  return UseTlsOffset(IRB, -0x10);
17589 
17590  return TargetLowering::getIRStackGuard(IRB);
17591 }
17592 
17594  // MSVC CRT provides functionalities for stack protection.
17595  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
17596  // MSVC CRT has a global variable holding security cookie.
17597  M.getOrInsertGlobal("__security_cookie",
17598  Type::getInt8PtrTy(M.getContext()));
17599 
17600  // MSVC CRT has a function to validate security cookie.
17601  FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
17602  "__security_check_cookie", Type::getVoidTy(M.getContext()),
17603  Type::getInt8PtrTy(M.getContext()));
17604  if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
17605  F->setCallingConv(CallingConv::Win64);
17606  F->addAttribute(1, Attribute::AttrKind::InReg);
17607  }
17608  return;
17609  }
17611 }
17612 
17614  // MSVC CRT has a global variable holding security cookie.
17615  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
17616  return M.getGlobalVariable("__security_cookie");
17618 }
17619 
17621  // MSVC CRT has a function to validate security cookie.
17622  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
17623  return M.getFunction("__security_check_cookie");
17625 }
17626 
17627 Value *
17629  // Android provides a fixed TLS slot for the SafeStack pointer. See the
17630  // definition of TLS_SLOT_SAFESTACK in
17631  // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
17632  if (Subtarget->isTargetAndroid())
17633  return UseTlsOffset(IRB, 0x48);
17634 
17635  // Fuchsia is similar.
17636  // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
17637  if (Subtarget->isTargetFuchsia())
17638  return UseTlsOffset(IRB, -0x8);
17639 
17641 }
17642 
17644  const Instruction &AndI) const {
17645  // Only sink 'and' mask to cmp use block if it is masking a single bit, since
17646  // this is likely to be fold the and/cmp/br into a single tbz instruction. It
17647  // may be beneficial to sink in other cases, but we would have to check that
17648  // the cmp would not get folded into the br to form a cbz for these to be
17649  // beneficial.
17650  ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
17651  if (!Mask)
17652  return false;
17653  return Mask->getValue().isPowerOf2();
17654 }
17655 
17659  unsigned OldShiftOpcode, unsigned NewShiftOpcode,
17660  SelectionDAG &DAG) const {
17661  // Does baseline recommend not to perform the fold by default?
17663  X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
17664  return false;
17665  // Else, if this is a vector shift, prefer 'shl'.
17666  return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL;
17667 }
17668 
17670  SDNode *N) const {
17671  if (DAG.getMachineFunction().getFunction().hasMinSize() &&
17672  !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
17673  return false;
17674  return true;
17675 }
17676 
17678  // Update IsSplitCSR in AArch64unctionInfo.
17679  AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
17680  AFI->setIsSplitCSR(true);
17681 }
17682 
17684  MachineBasicBlock *Entry,
17685  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
17686  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
17687  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
17688  if (!IStart)
17689  return;
17690 
17691  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
17692  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
17693  MachineBasicBlock::iterator MBBI = Entry->begin();
17694  for (const MCPhysReg *I = IStart; *I; ++I) {
17695  const TargetRegisterClass *RC = nullptr;
17696  if (AArch64::GPR64RegClass.contains(*I))
17697  RC = &AArch64::GPR64RegClass;
17698  else if (AArch64::FPR64RegClass.contains(*I))
17699  RC = &AArch64::FPR64RegClass;
17700  else
17701  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
17702 
17703  Register NewVR = MRI->createVirtualRegister(RC);
17704  // Create copy from CSR to a virtual register.
17705  // FIXME: this currently does not emit CFI pseudo-instructions, it works
17706  // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
17707  // nounwind. If we want to generalize this later, we may need to emit
17708  // CFI pseudo-instructions.
17709  assert(Entry->getParent()->getFunction().hasFnAttribute(
17710  Attribute::NoUnwind) &&
17711  "Function should be nounwind in insertCopiesSplitCSR!");
17712  Entry->addLiveIn(*I);
17713  BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
17714  .addReg(*I);
17715 
17716  // Insert the copy-back instructions right before the terminator.
17717  for (auto *Exit : Exits)
17718  BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
17719  TII->get(TargetOpcode::COPY), *I)
17720  .addReg(NewVR);
17721  }
17722 }
17723 
17725  // Integer division on AArch64 is expensive. However, when aggressively
17726  // optimizing for code size, we prefer to use a div instruction, as it is
17727  // usually smaller than the alternative sequence.
17728  // The exception to this is vector division. Since AArch64 doesn't have vector
17729  // integer division, leaving the division as-is is a loss even in terms of
17730  // size, because it will have to be scalarized, while the alternative code
17731  // sequence can be performed in vector form.
17732  bool OptSize = Attr.hasFnAttribute(Attribute::MinSize);
17733  return OptSize && !VT.isVector();
17734 }
17735 
17737  // We want inc-of-add for scalars and sub-of-not for vectors.
17738  return VT.isScalarInteger();
17739 }
17740 
17742  return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
17743 }
17744 
17745 unsigned
17747  if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
17748  return getPointerTy(DL).getSizeInBits();
17749 
17750  return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
17751 }
17752 
17753 void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
17756 }
17757 
17758 // Unlike X86, we let frame lowering assign offsets to all catch objects.
17760  return false;
17761 }
17762 
17763 bool AArch64TargetLowering::shouldLocalize(
17764  const MachineInstr &MI, const TargetTransformInfo *TTI) const {
17765  switch (MI.getOpcode()) {
17766  case TargetOpcode::G_GLOBAL_VALUE: {
17767  // On Darwin, TLS global vars get selected into function calls, which
17768  // we don't want localized, as they can get moved into the middle of a
17769  // another call sequence.
17770  const GlobalValue &GV = *MI.getOperand(1).getGlobal();
17771  if (GV.isThreadLocal() && Subtarget->isTargetMachO())
17772  return false;
17773  break;
17774  }
17775  // If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being
17776  // localizable.
17777  case AArch64::ADRP:
17778  case AArch64::G_ADD_LOW:
17779  return true;
17780  default:
17781  break;
17782  }
17784 }
17785 
17787  if (isa<ScalableVectorType>(Inst.getType()))
17788  return true;
17789 
17790  for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
17791  if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
17792  return true;
17793 
17794  if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
17795  if (isa<ScalableVectorType>(AI->getAllocatedType()))
17796  return true;
17797  }
17798 
17799  return false;
17800 }
17801 
17802 // Return the largest legal scalable vector type that matches VT's element type.
17804  assert(VT.isFixedLengthVector() &&
17805  DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
17806  "Expected legal fixed length vector!");
17807  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
17808  default:
17809  llvm_unreachable("unexpected element type for SVE container");
17810  case MVT::i8:
17811  return EVT(MVT::nxv16i8);
17812  case MVT::i16:
17813  return EVT(MVT::nxv8i16);
17814  case MVT::i32:
17815  return EVT(MVT::nxv4i32);
17816  case MVT::i64:
17817  return EVT(MVT::nxv2i64);
17818  case MVT::f16:
17819  return EVT(MVT::nxv8f16);
17820  case MVT::f32:
17821  return EVT(MVT::nxv4f32);
17822  case MVT::f64:
17823  return EVT(MVT::nxv2f64);
17824  }
17825 }
17826 
17827 // Return a PTRUE with active lanes corresponding to the extent of VT.
17829  EVT VT) {
17830  assert(VT.isFixedLengthVector() &&
17831  DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
17832  "Expected legal fixed length vector!");
17833 
17834  int PgPattern;
17835  switch (VT.getVectorNumElements()) {
17836  default:
17837  llvm_unreachable("unexpected element count for SVE predicate");
17838  case 1:
17839  PgPattern = AArch64SVEPredPattern::vl1;
17840  break;
17841  case 2:
17842  PgPattern = AArch64SVEPredPattern::vl2;
17843  break;
17844  case 4:
17845  PgPattern = AArch64SVEPredPattern::vl4;
17846  break;
17847  case 8:
17848  PgPattern = AArch64SVEPredPattern::vl8;
17849  break;
17850  case 16:
17851  PgPattern = AArch64SVEPredPattern::vl16;
17852  break;
17853  case 32:
17854  PgPattern = AArch64SVEPredPattern::vl32;
17855  break;
17856  case 64:
17857  PgPattern = AArch64SVEPredPattern::vl64;
17858  break;
17859  case 128:
17860  PgPattern = AArch64SVEPredPattern::vl128;
17861  break;
17862  case 256:
17863  PgPattern = AArch64SVEPredPattern::vl256;
17864  break;
17865  }
17866 
17867  // TODO: For vectors that are exactly getMaxSVEVectorSizeInBits big, we can
17868  // use AArch64SVEPredPattern::all, which can enable the use of unpredicated
17869  // variants of instructions when available.
17870 
17871  MVT MaskVT;
17872  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
17873  default:
17874  llvm_unreachable("unexpected element type for SVE predicate");
17875  case MVT::i8:
17876  MaskVT = MVT::nxv16i1;
17877  break;
17878  case MVT::i16:
17879  case MVT::f16:
17880  MaskVT = MVT::nxv8i1;
17881  break;
17882  case MVT::i32:
17883  case MVT::f32:
17884  MaskVT = MVT::nxv4i1;
17885  break;
17886  case MVT::i64:
17887  case MVT::f64:
17888  MaskVT = MVT::nxv2i1;
17889  break;
17890  }
17891 
17892  return DAG.getNode(AArch64ISD::PTRUE, DL, MaskVT,
17893  DAG.getTargetConstant(PgPattern, DL, MVT::i64));
17894 }
17895 
17897  EVT VT) {
17899  "Expected legal scalable vector!");
17900  auto PredTy = VT.changeVectorElementType(MVT::i1);
17901  return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
17902 }
17903 
17905  if (VT.isFixedLengthVector())
17906  return getPredicateForFixedLengthVector(DAG, DL, VT);
17907 
17908  return getPredicateForScalableVector(DAG, DL, VT);
17909 }
17910 
17911 // Grow V to consume an entire SVE register.
17913  assert(VT.isScalableVector() &&
17914  "Expected to convert into a scalable vector!");
17916  "Expected a fixed length vector operand!");
17917  SDLoc DL(V);
17918  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
17919  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
17920 }
17921 
17922 // Shrink V so it's just big enough to maintain a VT's worth of data.
17924  assert(VT.isFixedLengthVector() &&
17925  "Expected to convert into a fixed length vector!");
17927  "Expected a scalable vector operand!");
17928  SDLoc DL(V);
17929  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
17930  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
17931 }
17932 
17933 // Convert all fixed length vector loads larger than NEON to masked_loads.
17934 SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
17935  SDValue Op, SelectionDAG &DAG) const {
17936  auto Load = cast<LoadSDNode>(Op);
17937 
17938  SDLoc DL(Op);
17939  EVT VT = Op.getValueType();
17940  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
17941 
17942  auto NewLoad = DAG.getMaskedLoad(
17943  ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
17944  getPredicateForFixedLengthVector(DAG, DL, VT), DAG.getUNDEF(ContainerVT),
17945  Load->getMemoryVT(), Load->getMemOperand(), Load->getAddressingMode(),
17946  Load->getExtensionType());
17947 
17948  auto Result = convertFromScalableVector(DAG, VT, NewLoad);
17949  SDValue MergedValues[2] = {Result, Load->getChain()};
17950  return DAG.getMergeValues(MergedValues, DL);
17951 }
17952 
17954  SelectionDAG &DAG) {
17955  SDLoc DL(Mask);
17956  EVT InVT = Mask.getValueType();
17957  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
17958 
17959  auto Op1 = convertToScalableVector(DAG, ContainerVT, Mask);
17960  auto Op2 = DAG.getConstant(0, DL, ContainerVT);
17961  auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
17962 
17963  EVT CmpVT = Pg.getValueType();
17964  return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
17965  {Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)});
17966 }
17967 
17968 // Convert all fixed length vector loads larger than NEON to masked_loads.
17969 SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
17970  SDValue Op, SelectionDAG &DAG) const {
17971  auto Load = cast<MaskedLoadSDNode>(Op);
17972 
17973  if (Load->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD)
17974  return SDValue();
17975 
17976  SDLoc DL(Op);
17977  EVT VT = Op.getValueType();
17978  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
17979 
17981 
17982  SDValue PassThru;
17983  bool IsPassThruZeroOrUndef = false;
17984 
17985  if (Load->getPassThru()->isUndef()) {
17986  PassThru = DAG.getUNDEF(ContainerVT);
17987  IsPassThruZeroOrUndef = true;
17988  } else {
17989  if (ContainerVT.isInteger())
17990  PassThru = DAG.getConstant(0, DL, ContainerVT);
17991  else
17992  PassThru = DAG.getConstantFP(0, DL, ContainerVT);
17993  if (isZerosVector(Load->getPassThru().getNode()))
17994  IsPassThruZeroOrUndef = true;
17995  }
17996 
17997  auto NewLoad = DAG.getMaskedLoad(
17998  ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
17999  Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
18000  Load->getAddressingMode(), Load->getExtensionType());
18001 
18002  if (!IsPassThruZeroOrUndef) {
18003  SDValue OldPassThru =
18004  convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
18005  NewLoad = DAG.getSelect(DL, ContainerVT, Mask, NewLoad, OldPassThru);
18006  }
18007 
18008  auto Result = convertFromScalableVector(DAG, VT, NewLoad);
18009  SDValue MergedValues[2] = {Result, Load->getChain()};
18010  return DAG.getMergeValues(MergedValues, DL);
18011 }
18012 
18013 // Convert all fixed length vector stores larger than NEON to masked_stores.
18014 SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
18015  SDValue Op, SelectionDAG &DAG) const {
18016  auto Store = cast<StoreSDNode>(Op);
18017 
18018  SDLoc DL(Op);
18019  EVT VT = Store->getValue().getValueType();
18020  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18021 
18022  auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
18023  return DAG.getMaskedStore(
18024  Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
18025  getPredicateForFixedLengthVector(DAG, DL, VT), Store->getMemoryVT(),
18026  Store->getMemOperand(), Store->getAddressingMode(),
18027  Store->isTruncatingStore());
18028 }
18029 
18030 SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
18031  SDValue Op, SelectionDAG &DAG) const {
18032  auto Store = cast<MaskedStoreSDNode>(Op);
18033 
18034  if (Store->isTruncatingStore())
18035  return SDValue();
18036 
18037  SDLoc DL(Op);
18038  EVT VT = Store->getValue().getValueType();
18039  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18040 
18041  auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
18043 
18044  return DAG.getMaskedStore(
18045  Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
18046  Mask, Store->getMemoryVT(), Store->getMemOperand(),
18047  Store->getAddressingMode(), Store->isTruncatingStore());
18048 }
18049 
18050 SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
18051  SDValue Op, SelectionDAG &DAG) const {
18052  SDLoc dl(Op);
18053  EVT VT = Op.getValueType();
18054  EVT EltVT = VT.getVectorElementType();
18055 
18056  bool Signed = Op.getOpcode() == ISD::SDIV;
18057  unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
18058 
18059  // Scalable vector i32/i64 DIV is supported.
18060  if (EltVT == MVT::i32 || EltVT == MVT::i64)
18061  return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
18062 
18063  // Scalable vector i8/i16 DIV is not supported. Promote it to i32.
18064  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18065  EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18066  EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
18067  EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);
18068 
18069  // If this is not a full vector, extend, div, and truncate it.
18070  EVT WidenedVT = VT.widenIntegerVectorElementType(*DAG.getContext());
18071  if (DAG.getTargetLoweringInfo().isTypeLegal(WidenedVT)) {
18072  unsigned ExtendOpcode = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
18073  SDValue Op0 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(0));
18074  SDValue Op1 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(1));
18075  SDValue Div = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0, Op1);
18076  return DAG.getNode(ISD::TRUNCATE, dl, VT, Div);
18077  }
18078 
18079  // Convert the operands to scalable vectors.
18080  SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
18081  SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
18082 
18083  // Extend the scalable operands.
18084  unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
18085  unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
18086  SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
18087  SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
18088  SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
18089  SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
18090 
18091  // Convert back to fixed vectors so the DIV can be further lowered.
18092  Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
18093  Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
18094  Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
18095  Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
18096  SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
18097  Op0Lo, Op1Lo);
18098  SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
18099  Op0Hi, Op1Hi);
18100 
18101  // Convert again to scalable vectors to truncate.
18102  ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
18103  ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
18104  SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
18105  ResultLo, ResultHi);
18106 
18107  return convertFromScalableVector(DAG, VT, ScalableResult);
18108 }
18109 
18110 SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
18111  SDValue Op, SelectionDAG &DAG) const {
18112  EVT VT = Op.getValueType();
18113  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
18114 
18115  SDLoc DL(Op);
18116  SDValue Val = Op.getOperand(0);
18117  EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
18118  Val = convertToScalableVector(DAG, ContainerVT, Val);
18119 
18120  bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
18121  unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
18122 
18123  // Repeatedly unpack Val until the result is of the desired element type.
18124  switch (ContainerVT.getSimpleVT().SimpleTy) {
18125  default:
18126  llvm_unreachable("unimplemented container type");
18127  case MVT::nxv16i8:
18128  Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
18129  if (VT.getVectorElementType() == MVT::i16)
18130  break;
18132  case MVT::nxv8i16:
18133  Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
18134  if (VT.getVectorElementType() == MVT::i32)
18135  break;
18137  case MVT::nxv4i32:
18138  Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
18139  assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!");
18140  break;
18141  }
18142 
18143  return convertFromScalableVector(DAG, VT, Val);
18144 }
18145 
18146 SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
18147  SDValue Op, SelectionDAG &DAG) const {
18148  EVT VT = Op.getValueType();
18149  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
18150 
18151  SDLoc DL(Op);
18152  SDValue Val = Op.getOperand(0);
18153  EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
18154  Val = convertToScalableVector(DAG, ContainerVT, Val);
18155 
18156  // Repeatedly truncate Val until the result is of the desired element type.
18157  switch (ContainerVT.getSimpleVT().SimpleTy) {
18158  default:
18159  llvm_unreachable("unimplemented container type");
18160  case MVT::nxv2i64:
18161  Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv4i32, Val);
18162  Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv4i32, Val, Val);
18163  if (VT.getVectorElementType() == MVT::i32)
18164  break;
18166  case MVT::nxv4i32:
18167  Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv8i16, Val);
18168  Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv8i16, Val, Val);
18169  if (VT.getVectorElementType() == MVT::i16)
18170  break;
18172  case MVT::nxv8i16:
18173  Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i8, Val);
18174  Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv16i8, Val, Val);
18175  assert(VT.getVectorElementType() == MVT::i8 && "Unexpected element type!");
18176  break;
18177  }
18178 
18179  return convertFromScalableVector(DAG, VT, Val);
18180 }
18181 
18182 SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
18183  SDValue Op, SelectionDAG &DAG) const {
18184  EVT VT = Op.getValueType();
18185  EVT InVT = Op.getOperand(0).getValueType();
18186  assert(InVT.isFixedLengthVector() && "Expected fixed length vector type!");
18187 
18188  SDLoc DL(Op);
18189  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18190  SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
18191 
18192  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Op.getOperand(1));
18193 }
18194 
18195 SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
18196  SDValue Op, SelectionDAG &DAG) const {
18197  EVT VT = Op.getValueType();
18198  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
18199 
18200  SDLoc DL(Op);
18201  EVT InVT = Op.getOperand(0).getValueType();
18202  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18203  SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
18204 
18205  auto ScalableRes = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Op0,
18206  Op.getOperand(1), Op.getOperand(2));
18207 
18208  return convertFromScalableVector(DAG, VT, ScalableRes);
18209 }
18210 
18211 // Convert vector operation 'Op' to an equivalent predicated operation whereby
18212 // the original operation's type is used to construct a suitable predicate.
18213 // NOTE: The results for inactive lanes are undefined.
18214 SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
18215  SelectionDAG &DAG,
18216  unsigned NewOp,
18217  bool OverrideNEON) const {
18218  EVT VT = Op.getValueType();
18219  SDLoc DL(Op);
18220  auto Pg = getPredicateForVector(DAG, DL, VT);
18221 
18222  if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
18223  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18224 
18225  // Create list of operands by converting existing ones to scalable types.
18227  for (const SDValue &V : Op->op_values()) {
18228  if (isa<CondCodeSDNode>(V)) {
18229  Operands.push_back(V);
18230  continue;
18231  }
18232 
18233  if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
18234  EVT VTArg = VTNode->getVT().getVectorElementType();
18235  EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
18236  Operands.push_back(DAG.getValueType(NewVTArg));
18237  continue;
18238  }
18239 
18240  assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) &&
18241  "Only fixed length vectors are supported!");
18242  Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
18243  }
18244 
18245  if (isMergePassthruOpcode(NewOp))
18246  Operands.push_back(DAG.getUNDEF(ContainerVT));
18247 
18248  auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
18249  return convertFromScalableVector(DAG, VT, ScalableRes);
18250  }
18251 
18252  assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");
18253 
18255  for (const SDValue &V : Op->op_values()) {
18256  assert((!V.getValueType().isVector() ||
18257  V.getValueType().isScalableVector()) &&
18258  "Only scalable vectors are supported!");
18259  Operands.push_back(V);
18260  }
18261 
18262  if (isMergePassthruOpcode(NewOp))
18263  Operands.push_back(DAG.getUNDEF(VT));
18264 
18265  return DAG.getNode(NewOp, DL, VT, Operands);
18266 }
18267 
18268 // If a fixed length vector operation has no side effects when applied to
18269 // undefined elements, we can safely use scalable vectors to perform the same
18270 // operation without needing to worry about predication.
18271 SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
18272  SelectionDAG &DAG) const {
18273  EVT VT = Op.getValueType();
18274  assert(useSVEForFixedLengthVectorVT(VT) &&
18275  "Only expected to lower fixed length vector operation!");
18276  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18277 
18278  // Create list of operands by converting existing ones to scalable types.
18280  for (const SDValue &V : Op->op_values()) {
18281  assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
18282 
18283  // Pass through non-vector operands.
18284  if (!V.getValueType().isVector()) {
18285  Ops.push_back(V);
18286  continue;
18287  }
18288 
18289  // "cast" fixed length vector to a scalable vector.
18290  assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
18291  "Only fixed length vectors are supported!");
18292  Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
18293  }
18294 
18295  auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
18296  return convertFromScalableVector(DAG, VT, ScalableRes);
18297 }
18298 
18299 SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
18300  SelectionDAG &DAG) const {
18301  SDLoc DL(ScalarOp);
18302  SDValue AccOp = ScalarOp.getOperand(0);
18303  SDValue VecOp = ScalarOp.getOperand(1);
18304  EVT SrcVT = VecOp.getValueType();
18305  EVT ResVT = SrcVT.getVectorElementType();
18306 
18307  EVT ContainerVT = SrcVT;
18308  if (SrcVT.isFixedLengthVector()) {
18309  ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
18310  VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
18311  }
18312 
18313  SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
18314  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
18315 
18316  // Convert operands to Scalable.
18317  AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
18318  DAG.getUNDEF(ContainerVT), AccOp, Zero);
18319 
18320  // Perform reduction.
18321  SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
18322  Pg, AccOp, VecOp);
18323 
18324  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
18325 }
18326 
18327 SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
18328  SelectionDAG &DAG) const {
18329  SDLoc DL(ReduceOp);
18330  SDValue Op = ReduceOp.getOperand(0);
18331  EVT OpVT = Op.getValueType();
18332  EVT VT = ReduceOp.getValueType();
18333 
18334  if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
18335  return SDValue();
18336 
18337  SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
18338 
18339  switch (ReduceOp.getOpcode()) {
18340  default:
18341  return SDValue();
18342  case ISD::VECREDUCE_OR:
18343  return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
18344  case ISD::VECREDUCE_AND: {
18345  Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
18346  return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
18347  }
18348  case ISD::VECREDUCE_XOR: {
18349  SDValue ID =
18350  DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
18351  SDValue Cntp =
18353  return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
18354  }
18355  }
18356 
18357  return SDValue();
18358 }
18359 
18360 SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
18361  SDValue ScalarOp,
18362  SelectionDAG &DAG) const {
18363  SDLoc DL(ScalarOp);
18364  SDValue VecOp = ScalarOp.getOperand(0);
18365  EVT SrcVT = VecOp.getValueType();
18366 
18367  if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
18368  EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
18369  VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
18370  }
18371 
18372  // UADDV always returns an i64 result.
18373  EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
18374  SrcVT.getVectorElementType();
18375  EVT RdxVT = SrcVT;
18376  if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
18377  RdxVT = getPackedSVEVectorVT(ResVT);
18378 
18379  SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
18380  SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
18381  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
18382  Rdx, DAG.getConstant(0, DL, MVT::i64));
18383 
18384  // The VEC_REDUCE nodes expect an element size result.
18385  if (ResVT != ScalarOp.getValueType())
18386  Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());
18387 
18388  return Res;
18389 }
18390 
18391 SDValue
18392 AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
18393  SelectionDAG &DAG) const {
18394  EVT VT = Op.getValueType();
18395  SDLoc DL(Op);
18396 
18397  EVT InVT = Op.getOperand(1).getValueType();
18398  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18399  SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
18400  SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
18401 
18402  // Convert the mask to a predicated (NOTE: We don't need to worry about
18403  // inactive lanes since VSELECT is safe when given undefined elements).
18404  EVT MaskVT = Op.getOperand(0).getValueType();
18405  EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
18406  auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
18407  Mask = DAG.getNode(ISD::TRUNCATE, DL,
18408  MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
18409 
18410  auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
18411  Mask, Op1, Op2);
18412 
18413  return convertFromScalableVector(DAG, VT, ScalableRes);
18414 }
18415 
18416 SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
18417  SDValue Op, SelectionDAG &DAG) const {
18418  SDLoc DL(Op);
18419  EVT InVT = Op.getOperand(0).getValueType();
18420  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18421 
18422  assert(useSVEForFixedLengthVectorVT(InVT) &&
18423  "Only expected to lower fixed length vector operation!");
18424  assert(Op.getValueType() == InVT.changeTypeToInteger() &&
18425  "Expected integer result of the same bit length as the inputs!");
18426 
18427  auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
18428  auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
18429  auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
18430 
18431  EVT CmpVT = Pg.getValueType();
18432  auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
18433  {Pg, Op1, Op2, Op.getOperand(2)});
18434 
18435  EVT PromoteVT = ContainerVT.changeTypeToInteger();
18436  auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
18437  return convertFromScalableVector(DAG, Op.getValueType(), Promote);
18438 }
18439 
18440 SDValue
18441 AArch64TargetLowering::LowerFixedLengthBitcastToSVE(SDValue Op,
18442  SelectionDAG &DAG) const {
18443  SDLoc DL(Op);
18444  auto SrcOp = Op.getOperand(0);
18445  EVT VT = Op.getValueType();
18446  EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
18447  EVT ContainerSrcVT =
18448  getContainerForFixedLengthVector(DAG, SrcOp.getValueType());
18449 
18450  SrcOp = convertToScalableVector(DAG, ContainerSrcVT, SrcOp);
18451  Op = DAG.getNode(ISD::BITCAST, DL, ContainerDstVT, SrcOp);
18452  return convertFromScalableVector(DAG, VT, Op);
18453 }
18454 
18455 SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
18456  SDValue Op, SelectionDAG &DAG) const {
18457  SDLoc DL(Op);
18458  unsigned NumOperands = Op->getNumOperands();
18459 
18460  assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
18461  "Unexpected number of operands in CONCAT_VECTORS");
18462 
18463  auto SrcOp1 = Op.getOperand(0);
18464  auto SrcOp2 = Op.getOperand(1);
18465  EVT VT = Op.getValueType();
18466  EVT SrcVT = SrcOp1.getValueType();
18467 
18468  if (NumOperands > 2) {
18470  EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
18471  for (unsigned I = 0; I < NumOperands; I += 2)
18472  Ops.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, PairVT,
18473  Op->getOperand(I), Op->getOperand(I + 1)));
18474 
18475  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
18476  }
18477 
18478  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18479 
18480  SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT);
18481  SrcOp1 = convertToScalableVector(DAG, ContainerVT, SrcOp1);
18482  SrcOp2 = convertToScalableVector(DAG, ContainerVT, SrcOp2);
18483 
18484  Op = DAG.getNode(AArch64ISD::SPLICE, DL, ContainerVT, Pg, SrcOp1, SrcOp2);
18485 
18486  return convertFromScalableVector(DAG, VT, Op);
18487 }
18488 
18489 SDValue
18490 AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(SDValue Op,
18491  SelectionDAG &DAG) const {
18492  EVT VT = Op.getValueType();
18493  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
18494 
18495  SDLoc DL(Op);
18496  SDValue Val = Op.getOperand(0);
18497  SDValue Pg = getPredicateForVector(DAG, DL, VT);
18498  EVT SrcVT = Val.getValueType();
18499  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18500  EVT ExtendVT = ContainerVT.changeVectorElementType(
18501  SrcVT.getVectorElementType());
18502 
18503  Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
18504  Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val);
18505 
18506  Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val);
18507  Val = getSVESafeBitCast(ExtendVT, Val, DAG);
18508  Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
18509  Pg, Val, DAG.getUNDEF(ContainerVT));
18510 
18511  return convertFromScalableVector(DAG, VT, Val);
18512 }
18513 
18514 SDValue
18515 AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
18516  SelectionDAG &DAG) const {
18517  EVT VT = Op.getValueType();
18518  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
18519 
18520  SDLoc DL(Op);
18521  SDValue Val = Op.getOperand(0);
18522  EVT SrcVT = Val.getValueType();
18523  EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
18524  EVT RoundVT = ContainerSrcVT.changeVectorElementType(
18525  VT.getVectorElementType());
18526  SDValue Pg = getPredicateForVector(DAG, DL, RoundVT);
18527 
18528  Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18529  Val = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, RoundVT, Pg, Val,
18530  Op.getOperand(1), DAG.getUNDEF(RoundVT));
18531  Val = getSVESafeBitCast(ContainerSrcVT.changeTypeToInteger(), Val, DAG);
18532  Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
18533 
18534  Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
18535  return DAG.getNode(ISD::BITCAST, DL, VT, Val);
18536 }
18537 
18538 SDValue
18539 AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
18540  SelectionDAG &DAG) const {
18541  EVT VT = Op.getValueType();
18542  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
18543 
18544  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
18545  unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
18547 
18548  SDLoc DL(Op);
18549  SDValue Val = Op.getOperand(0);
18550  EVT SrcVT = Val.getValueType();
18551  EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
18552  EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
18553 
18554  if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
18555  ContainerDstVT.getVectorElementType().getSizeInBits()) {
18556  SDValue Pg = getPredicateForVector(DAG, DL, VT);
18557 
18558  Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
18559  VT.changeTypeToInteger(), Val);
18560 
18561  Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18562  Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG);
18563  // Safe to use a larger than specified operand since we just unpacked the
18564  // data, hence the upper bits are zero.
18565  Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
18566  DAG.getUNDEF(ContainerDstVT));
18567  return convertFromScalableVector(DAG, VT, Val);
18568  } else {
18569  EVT CvtVT = ContainerSrcVT.changeVectorElementType(
18570  ContainerDstVT.getVectorElementType());
18571  SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
18572 
18573  Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18574  Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
18575  Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
18576  Val = convertFromScalableVector(DAG, SrcVT, Val);
18577 
18578  Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
18579  return DAG.getNode(ISD::BITCAST, DL, VT, Val);
18580  }
18581 }
18582 
18583 SDValue
18584 AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
18585  SelectionDAG &DAG) const {
18586  EVT VT = Op.getValueType();
18587  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
18588 
18589  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
18590  unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
18592 
18593  SDLoc DL(Op);
18594  SDValue Val = Op.getOperand(0);
18595  EVT SrcVT = Val.getValueType();
18596  EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
18597  EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
18598 
18599  if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
18600  ContainerDstVT.getVectorElementType().getSizeInBits()) {
18601  EVT CvtVT = ContainerDstVT.changeVectorElementType(
18602  ContainerSrcVT.getVectorElementType());
18603  SDValue Pg = getPredicateForVector(DAG, DL, VT);
18604 
18605  Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
18606  Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);
18607 
18608  Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18609  Val = getSVESafeBitCast(CvtVT, Val, DAG);
18610  Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
18611  DAG.getUNDEF(ContainerDstVT));
18612  return convertFromScalableVector(DAG, VT, Val);
18613  } else {
18614  EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
18615  SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
18616 
18617  // Safe to use a larger than specified result since an fp_to_int where the
18618  // result doesn't fit into the destination is undefined.
18619  Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18620  Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
18621  Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
18622 
18623  return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
18624  }
18625 }
18626 
18627 SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
18628  SDValue Op, SelectionDAG &DAG) const {
18629  EVT VT = Op.getValueType();
18630  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
18631 
18632  auto *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
18633  auto ShuffleMask = SVN->getMask();
18634 
18635  SDLoc DL(Op);
18636  SDValue Op1 = Op.getOperand(0);
18637  SDValue Op2 = Op.getOperand(1);
18638 
18639  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18640  Op1 = convertToScalableVector(DAG, ContainerVT, Op1);
18641  Op2 = convertToScalableVector(DAG, ContainerVT, Op2);
18642 
18643  bool ReverseEXT = false;
18644  unsigned Imm;
18645  if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
18646  Imm == VT.getVectorNumElements() - 1) {
18647  if (ReverseEXT)
18648  std::swap(Op1, Op2);
18649 
18650  EVT ScalarTy = VT.getVectorElementType();
18651  if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
18652  ScalarTy = MVT::i32;
18653  SDValue Scalar = DAG.getNode(
18654  ISD::EXTRACT_VECTOR_ELT, DL, ScalarTy, Op1,
18655  DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64));
18656  Op = DAG.getNode(AArch64ISD::INSR, DL, ContainerVT, Op2, Scalar);
18657  return convertFromScalableVector(DAG, VT, Op);
18658  }
18659 
18660  return SDValue();
18661 }
18662 
18663 SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
18664  SelectionDAG &DAG) const {
18665  SDLoc DL(Op);
18666  EVT InVT = Op.getValueType();
18667  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18668  (void)TLI;
18669 
18670  assert(VT.isScalableVector() && TLI.isTypeLegal(VT) &&
18671  InVT.isScalableVector() && TLI.isTypeLegal(InVT) &&
18672  "Only expect to cast between legal scalable vector types!");
18673  assert((VT.getVectorElementType() == MVT::i1) ==
18674  (InVT.getVectorElementType() == MVT::i1) &&
18675  "Cannot cast between data and predicate scalable vector types!");
18676 
18677  if (InVT == VT)
18678  return Op;
18679 
18680  if (VT.getVectorElementType() == MVT::i1)
18681  return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
18682 
18684  EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
18685 
18686  // Pack input if required.
18687  if (InVT != PackedInVT)
18688  Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
18689 
18690  Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
18691 
18692  // Unpack result if required.
18693  if (VT != PackedVT)
18695 
18696  return Op;
18697 }
18698 
18701 }
18702 
18705 }
18706 
18707 bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
18708  SDValue Op, const APInt &OriginalDemandedBits,
18709  const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
18710  unsigned Depth) const {
18711 
18712  unsigned Opc = Op.getOpcode();
18713  switch (Opc) {
18714  case AArch64ISD::VSHL: {
18715  // Match (VSHL (VLSHR Val X) X)
18716  SDValue ShiftL = Op;
18717  SDValue ShiftR = Op->getOperand(0);
18718  if (ShiftR->getOpcode() != AArch64ISD::VLSHR)
18719  return false;
18720 
18721  if (!ShiftL.hasOneUse() || !ShiftR.hasOneUse())
18722  return false;
18723 
18724  unsigned ShiftLBits = ShiftL->getConstantOperandVal(1);
18725  unsigned ShiftRBits = ShiftR->getConstantOperandVal(1);
18726 
18727  // Other cases can be handled as well, but this is not
18728  // implemented.
18729  if (ShiftRBits != ShiftLBits)
18730  return false;
18731 
18732  unsigned ScalarSize = Op.getScalarValueSizeInBits();
18733  assert(ScalarSize > ShiftLBits && "Invalid shift imm");
18734 
18735  APInt ZeroBits = APInt::getLowBitsSet(ScalarSize, ShiftLBits);
18736  APInt UnusedBits = ~OriginalDemandedBits;
18737 
18738  if ((ZeroBits & UnusedBits) != ZeroBits)
18739  return false;
18740 
18741  // All bits that are zeroed by (VSHL (VLSHR Val X) X) are not
18742  // used - simplify to just Val.
18743  return TLO.CombineTo(Op, ShiftR->getOperand(0));
18744  }
18745  }
18746 
18748  Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
18749 }
18750 
18751 bool AArch64TargetLowering::isConstantUnsignedBitfieldExtactLegal(
18752  unsigned Opc, LLT Ty1, LLT Ty2) const {
18753  return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64));
18754 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
EnableOptimizeLogicalImm
static cl::opt< bool > EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, cl::desc("Enable AArch64 logical imm instruction " "optimization"), cl::init(true))
llvm::TargetLoweringBase::getPreferredVectorAction
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
Definition: TargetLowering.h:437
llvm::AArch64ISD::LD1x2post
@ LD1x2post
Definition: AArch64ISelLowering.h:419
llvm::ISD::FPOWI
@ FPOWI
Definition: ISDOpcodes.h:872
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:153
llvm::AArch64ISD::EORV_PRED
@ EORV_PRED
Definition: AArch64ISelLowering.h:260
llvm::AArch64ISD::NodeType
NodeType
Definition: AArch64ISelLowering.h:49
OP_VTRNL
@ OP_VTRNL
Definition: ARMISelLowering.cpp:8119
llvm::SDNode::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Definition: SelectionDAGNodes.h:1579
OP_VDUP0
@ OP_VDUP0
Definition: ARMISelLowering.cpp:8108
llvm::ISD::FROUNDEVEN
@ FROUNDEVEN
Definition: ISDOpcodes.h:884
llvm::TargetLowering::SimplifyDemandedBitsForTargetNode
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
Definition: TargetLowering.cpp:3012
i
i
Definition: README.txt:29
llvm::MVT::v1i16
@ v1i16
Definition: MachineValueType.h:88
llvm::MVT::nxv4i1
@ nxv4i1
Definition: MachineValueType.h:190
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::ISD::STRICT_FP_ROUND
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:451
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1368
llvm::AArch64ISD::LD2DUPpost
@ LD2DUPpost
Definition: AArch64ISelLowering.h:426
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
tryExtendDUPToExtractHigh
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:13644
llvm::TargetLoweringBase::MaxStoresPerMemsetOptSize
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3093
llvm::AArch64II::MO_G3
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
Definition: AArch64BaseInfo.h:632
llvm::AArch64ISD::SVE_LD4_MERGE_ZERO
@ SVE_LD4_MERGE_ZERO
Definition: AArch64ISelLowering.h:349
llvm::isAsynchronousEHPersonality
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
Definition: EHPersonalities.h:50
llvm::AArch64ISD::UMULL
@ UMULL
Definition: AArch64ISelLowering.h:292
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:38
ValueTypes.h
llvm::ISD::ArgFlagsTy::isReturned
bool isReturned() const
Definition: TargetCallingConv.h:121
llvm::AArch64ISD::LOADgot
@ LOADgot
Definition: AArch64ISelLowering.h:64
llvm::TargetLoweringBase::getPointerMemTy
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
Definition: TargetLowering.h:352
llvm::AArch64_AM::encodeAdvSIMDModImmType3
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
Definition: AArch64AddressingModes.h:486
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::AArch64ISD::LD3post
@ LD3post
Definition: AArch64ISelLowering.h:414
llvm::AArch64CC::Invalid
@ Invalid
Definition: AArch64BaseInfo.h:272
llvm::ShuffleVectorSDNode::getSplatIndex
int getSplatIndex() const
Definition: SelectionDAGNodes.h:1508
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1542
llvm::AArch64Subtarget::isTargetWindows
bool isTargetWindows() const
Definition: AArch64Subtarget.h:503
llvm::AArch64ISD::UUNPKHI
@ UUNPKHI
Definition: AArch64ISelLowering.h:302
llvm::AArch64FunctionInfo::setArgumentStackToRestore
void setArgumentStackToRestore(unsigned bytes)
Definition: AArch64MachineFunctionInfo.h:185
llvm::MaskedLoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Definition: SelectionDAGNodes.h:2376
FALKOR_STRIDED_ACCESS_MD
#define FALKOR_STRIDED_ACCESS_MD
Definition: AArch64InstrInfo.h:36
llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2313
AArch64RegisterInfo.h
llvm::ISD::STRICT_FSETCC
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:462
areOperandsOfVmullHighP64
static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2)
Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
Definition: AArch64ISelLowering.cpp:11531
llvm::Type::FloatTyID
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:274
tryAdvSIMDModImm16
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
Definition: AArch64ISelLowering.cpp:9701
llvm::MVT::nxv2i1
@ nxv2i1
Definition: MachineValueType.h:189
llvm::AArch64Subtarget::hasFuseLiterals
bool hasFuseLiterals() const
Definition: AArch64Subtarget.h:405
llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition: TargetLowering.h:2121
llvm::AArch64CC::LO
@ LO
Definition: AArch64BaseInfo.h:258
OP_VZIPL
@ OP_VZIPL
Definition: ARMISelLowering.cpp:8117
llvm::AArch64Subtarget::hasFPARMv8
bool hasFPARMv8() const
Definition: AArch64Subtarget.h:372
llvm::ISD::VECTOR_SHUFFLE
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:563
llvm::AArch64ISD::VLSHR
@ VLSHR
Definition: AArch64ISelLowering.h:190
llvm::SDValue::dump
void dump() const
Definition: SelectionDAGNodes.h:1165
llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition: MachineValueType.h:519
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4630
llvm::TargetLoweringBase::AddPromotedToType
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
Definition: TargetLowering.h:2268
llvm::SelectionDAG::getCALLSEQ_START
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:923
llvm::AArch64ISD::SSTNT1_INDEX_PRED
@ SSTNT1_INDEX_PRED
Definition: AArch64ISelLowering.h:406
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1379
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1072
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::AArch64ISD::ST1x4post
@ ST1x4post
Definition: AArch64ISelLowering.h:424
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1364
llvm::ISD::isOverflowIntrOpRes
bool isOverflowIntrOpRes(SDValue Op)
Returns true if the specified value is the overflow result from one of the overflow intrinsic nodes.
Definition: SelectionDAGNodes.h:2746
llvm::MVT::v4f16
@ v4f16
Definition: MachineValueType.h:136
llvm::AArch64ISD::UMINV
@ UMINV
Definition: AArch64ISelLowering.h:249
llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1304
llvm::AArch64_AM::isAdvSIMDModImmType6
static bool isAdvSIMDModImmType6(uint64_t Imm)
Definition: AArch64AddressingModes.h:527
llvm::AArch64Subtarget::supportsAddressTopByteIgnored
bool supportsAddressTopByteIgnored() const
CPU has TBI (top byte of addresses is ignored during HW address translation) and OS enables it.
Definition: AArch64Subtarget.cpp:322
INT64_MAX
#define INT64_MAX
Definition: DataTypes.h:71
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:102
llvm::AArch64CC::HI
@ HI
Definition: AArch64BaseInfo.h:263
MachineInstr.h
llvm::MaskedGatherSDNode
This class is used to represent an MGATHER node.
Definition: SelectionDAGNodes.h:2474
llvm::ISD::UMULO
@ UMULO
Definition: ISDOpcodes.h:319
MathExtras.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
isExtendedBUILD_VECTOR
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned)
Definition: AArch64ISelLowering.cpp:3596
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
llvm::AArch64TargetLowering::getSDagStackGuard
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
Definition: AArch64ISelLowering.cpp:17613
llvm::AArch64ISD::CLASTA_N
@ CLASTA_N
Definition: AArch64ISelLowering.h:305
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::AArch64TargetLowering::useLoadStackGuardNode
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
Definition: AArch64ISelLowering.cpp:17366
llvm::objcarc::hasAttachedCallOpBundle
bool hasAttachedCallOpBundle(const CallBase *CB)
Definition: ObjCARCUtil.h:34
splitStoreSplat
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, SDValue SplatVal, unsigned NumVecElts)
Definition: AArch64ISelLowering.cpp:14585
llvm::TargetOptions::GuaranteedTailCallOpt
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
Definition: TargetOptions.h:202
AArch64MachineFunctionInfo.h
llvm::AArch64TargetLowering::generateFMAsInMachineCombiner
bool generateFMAsInMachineCombiner(EVT VT, CodeGenOpt::Level OptLevel) const override
Definition: AArch64ISelLowering.cpp:12171
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::ISD::JumpTable
@ JumpTable
Definition: ISDOpcodes.h:81
BlockSize
static const int BlockSize
Definition: TarWriter.cpp:33
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::AArch64FunctionInfo::getVarArgsFPRIndex
int getVarArgsFPRIndex() const
Definition: AArch64MachineFunctionInfo.h:316
tryAdvSIMDModImm8
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition: AArch64ISelLowering.cpp:9772
llvm::MVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: MachineValueType.h:1058
llvm::AArch64CC::AL
@ AL
Definition: AArch64BaseInfo.h:269
llvm::ISD::VECTOR_REVERSE
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:554
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition: SelectionDAGNodes.h:958
llvm::AArch64TargetLowering::getSSPStackGuardCheck
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
Definition: AArch64ISelLowering.cpp:17620
llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:150
llvm::MVT::nxv2f64
@ nxv2f64
Definition: MachineValueType.h:247
performInsertVectorEltCombine
static SDValue performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: AArch64ISelLowering.cpp:16571
llvm::AArch64ISD::FMINV_PRED
@ FMINV_PRED
Definition: AArch64ISelLowering.h:316
getAArch64XALUOOp
static std::pair< SDValue, SDValue > getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:2995
llvm::AArch64ISD::SITOF
@ SITOF
Definition: AArch64ISelLowering.h:279
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
llvm::AttributeList::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but may be faster.
Definition: Attributes.cpp:1400
isLanes1toNKnownZero
static bool isLanes1toNKnownZero(SDValue Op)
Definition: AArch64ISelLowering.cpp:16511
llvm::AArch64_AM::isAdvSIMDModImmType12
static bool isAdvSIMDModImmType12(uint64_t Imm)
Definition: AArch64AddressingModes.h:705
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1524
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1078
OP_VREV
@ OP_VREV
Definition: ARMISelLowering.cpp:8107
llvm::MVT::isFixedLengthVector
bool isFixedLengthVector() const
Definition: MachineValueType.h:378
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:36
lowerConvertToSVBool
static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3830
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:196
llvm::StructType::get
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:372
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::ISD::FLT_ROUNDS_
@ FLT_ROUNDS_
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:825
performAddDotCombine
static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:13863
llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: MachineValueType.h:350
llvm::MVT::v1f64
@ v1f64
Definition: MachineValueType.h:171
combineSVEReductionOrderedFP
static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14263
llvm::TargetMachine::useEmulatedTLS
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Definition: TargetMachine.cpp:162
llvm::AArch64II::MO_HI12
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
Definition: AArch64BaseInfo.h:649
AArch64SetCCInfo
Helper structure to keep track of a SET_CC lowered into AArch64 code.
Definition: AArch64ISelLowering.cpp:13698
type
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:202
Upa
@ Upa
Definition: AArch64ISelLowering.cpp:8144
llvm::AArch64TargetLowering::targetShrinkDemandedConstant
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
Definition: AArch64ISelLowering.cpp:1714
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::ISD::SETGT
@ SETGT
Definition: ISDOpcodes.h:1376
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:848
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1336
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:255
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1380
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::TargetLowering::getSingleConstraintMatchWeight
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Definition: TargetLowering.cpp:4879
getExtFactor
static unsigned getExtFactor(SDValue &V)
getExtFactor - Determine the adjustment factor for the position when generating an "extract from vect...
Definition: AArch64ISelLowering.cpp:8502
llvm::MachineFrameInfo::setReturnAddressIsTaken
void setReturnAddressIsTaken(bool s)
Definition: MachineFrameInfo.h:373
llvm::TargetLowering::ConstraintType
ConstraintType
Definition: TargetLowering.h:4138
llvm::AArch64Subtarget::predictableSelectIsExpensive
bool predictableSelectIsExpensive() const
Definition: AArch64Subtarget.h:386
llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:946
llvm::EVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:323
llvm::AArch64ISD::FCMGEz
@ FCMGEz
Definition: AArch64ISelLowering.h:221
llvm::SelectionDAG::addNoMergeSiteInfo
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Definition: SelectionDAG.h:1989
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:228
isAllConstantBuildVector
static bool isAllConstantBuildVector(const SDValue &PotentialBVec, uint64_t &ConstVal)
Definition: AArch64ISelLowering.cpp:9826
llvm::AArch64TargetLowering::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
This method returns a target specific FastISel object, or null if the target does not support "fast" ...
Definition: AArch64ISelLowering.cpp:1882
llvm::AArch64CC::getNZCVToSatisfyCondCode
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
Definition: AArch64BaseInfo.h:313
llvm::AArch64II::MO_G1
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
Definition: AArch64BaseInfo.h:640
llvm::ShuffleVectorInst::getShuffleMask
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Definition: Instructions.cpp:2061
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1335
llvm::TargetOptions
Definition: TargetOptions.h:113
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:191
llvm::ElementCount
Definition: TypeSize.h:386
llvm::AArch64ISD::SMAX_PRED
@ SMAX_PRED
Definition: AArch64ISelLowering.h:95
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:898
llvm::ISD::AssertSext
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:735
llvm::CallingConv::CFGuard_Check
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:87
llvm::EVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:349
llvm::AArch64CC::MI
@ MI
Definition: AArch64BaseInfo.h:259
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::HexagonISD::JT
@ JT
Definition: HexagonISelLowering.h:52
llvm::TypeSize::getFixedSize
ScalarTy getFixedSize() const
Definition: TypeSize.h:426
llvm::AArch64ISD::CCMN
@ CCMN
Definition: AArch64ISelLowering.h:142
llvm::AArch64ISD::DUP_MERGE_PASSTHRU
@ DUP_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:327
T
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:152
llvm::AArch64TargetLowering::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: AArch64ISelLowering.cpp:12056
llvm::AArch64TargetLowering::emitLoadLinked
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Definition: AArch64ISelLowering.cpp:17470
isTRNMask
static bool isTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
Definition: AArch64ISelLowering.cpp:8950
llvm::AArch64ISD::FCMLEz
@ FCMLEz
Definition: AArch64ISelLowering.h:223
llvm::AArch64ISD::CTPOP_MERGE_PASSTHRU
@ CTPOP_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:326
llvm::isOneConstant
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Definition: SelectionDAG.cpp:9542
llvm::PointerType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:672
performGlobalAddressCombine
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget, const TargetMachine &TM)
Definition: AArch64ISelLowering.cpp:16032
llvm::AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO
@ GLDFF1S_UXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:383
llvm::AArch64ISD::PREFETCH
@ PREFETCH
Definition: AArch64ISelLowering.h:276
llvm::Function
Definition: Function.h:62
llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:77
performUADDVCombine
static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:13826
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:519
llvm::TargetLowering::TargetLoweringOpt::LegalOps
bool LegalOps
Definition: TargetLowering.h:3286
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::ISD::PRE_DEC
@ PRE_DEC
Definition: ISDOpcodes.h:1304
llvm::AArch64Subtarget::getPrefFunctionLogAlignment
unsigned getPrefFunctionLogAlignment() const
Definition: AArch64Subtarget.h:439
llvm::ISD::ArgFlagsTy::isInConsecutiveRegsLast
bool isInConsecutiveRegsLast() const
Definition: TargetCallingConv.h:127
performSRLCombine
static SDValue performSRLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: AArch64ISelLowering.cpp:13288
emitConjunction
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
Definition: AArch64ISelLowering.cpp:2815
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:666
llvm::ISD::UDIV
@ UDIV
Definition: ISDOpcodes.h:243
llvm::ISD::STRICT_UINT_TO_FP
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:436
llvm::AArch64ISD::UMAX_PRED
@ UMAX_PRED
Definition: AArch64ISelLowering.h:101
llvm::AArch64TargetLowering::needsFixedCatchObjects
bool needsFixedCatchObjects() const override
Used for exception handling on Win64.
Definition: AArch64ISelLowering.cpp:17759
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:311
llvm::MVT::i128
@ i128
Definition: MachineValueType.h:48
llvm::MVT::nxv2f32
@ nxv2f32
Definition: MachineValueType.h:241
llvm::AArch64ISD::TC_RETURN
@ TC_RETURN
Definition: AArch64ISelLowering.h:273
llvm::AArch64ISD::CTLZ_MERGE_PASSTHRU
@ CTLZ_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:325
llvm::ISD::DYNAMIC_STACKALLOC
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:931
llvm::AArch64ISD::LD3LANEpost
@ LD3LANEpost
Definition: AArch64ISelLowering.h:431
performCONDCombine
static SDValue performCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex)
Definition: AArch64ISelLowering.cpp:15591
llvm::AArch64ISD::SST1_UXTW_SCALED_PRED
@ SST1_UXTW_SCALED_PRED
Definition: AArch64ISelLowering.h:400
llvm::ISD::ATOMIC_LOAD_CLR
@ ATOMIC_LOAD_CLR
Definition: ISDOpcodes.h:1143
llvm::AArch64Subtarget::getMinSVEVectorSizeInBits
unsigned getMinSVEVectorSizeInBits() const
Definition: AArch64Subtarget.h:611
convertFixedMaskToScalableVector
static SDValue convertFixedMaskToScalableVector(SDValue Mask, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17953
llvm::AArch64CC::NONE_ACTIVE
@ NONE_ACTIVE
Definition: AArch64BaseInfo.h:278
llvm::SDNode::isUndef
bool isUndef() const
Return true if the type of the node type undefined.
Definition: SelectionDAGNodes.h:644
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition: SelectionDAG.cpp:1697
llvm::AArch64ISD::SBCS
@ SBCS
Definition: AArch64ISelLowering.h:137
llvm::AArch64_AM::getFP16Imm
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
Definition: AArch64AddressingModes.h:367
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:469
llvm::BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
Definition: SelectionDAG.cpp:10471
llvm::AArch64ISD::TBNZ
@ TBNZ
Definition: AArch64ISelLowering.h:270
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
OP_COPY
@ OP_COPY
Definition: ARMISelLowering.cpp:8106
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::AArch64ISD::FMAXNMV_PRED
@ FMAXNMV_PRED
Definition: AArch64ISelLowering.h:315
llvm::AArch64ISD::SADDV
@ SADDV
Definition: AArch64ISelLowering.h:228
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
GetElementPtrTypeIterator.h
contains
return AArch64::GPR64RegClass contains(Reg)
isSetCCOrZExtSetCC
static bool isSetCCOrZExtSetCC(const SDValue &Op, SetCCInfoAndKind &Info)
Definition: AArch64ISelLowering.cpp:13766
performORCombine
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:13106
llvm::AArch64ISD::LD1x3post
@ LD1x3post
Definition: AArch64ISelLowering.h:420
performLD1Combine
static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc)
Definition: AArch64ISelLowering.cpp:14655
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:319
llvm::TargetLoweringBase::MaxStoresPerMemset
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
Definition: TargetLowering.h:3091
llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition: CodeGen.h:43
llvm::ISD::MLOAD
@ MLOAD
Definition: ISDOpcodes.h:1160
llvm::AArch64ISD::UZP1
@ UZP1
Definition: AArch64ISelLowering.h:178
changeFPCCToANDAArch64CC
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
Definition: AArch64ISelLowering.cpp:2392
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
performSpliceCombine
static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14992
llvm::AtomicRMWInst::getOperation
BinOp getOperation() const
Definition: Instructions.h:804
llvm::AArch64ISD::ZIP1
@ ZIP1
Definition: AArch64ISelLowering.h:176
llvm::SelectionDAG::getFrameIndex
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
Definition: SelectionDAG.cpp:1578
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1169
llvm::ISD::SIGNED_SCALED
@ SIGNED_SCALED
Definition: ISDOpcodes.h:1317
llvm::AArch64_AM::encodeAdvSIMDModImmType1
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
Definition: AArch64AddressingModes.h:456
Statistic.h
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::ISD::FP_TO_UINT_SAT
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:801
performConcatVectorsCombine
static SDValue performConcatVectorsCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:13453
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1375
llvm::AArch64ISD::MULHU_PRED
@ MULHU_PRED
Definition: AArch64ISelLowering.h:92
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:366
llvm::AArch64_AM::isAdvSIMDModImmType4
static bool isAdvSIMDModImmType4(uint64_t Imm)
Definition: AArch64AddressingModes.h:496
llvm::CallingConv::Tail
@ Tail
Tail - This calling convention attemps to make calls as fast as possible while guaranteeing that tail...
Definition: CallingConv.h:81
llvm::ISD::STACKRESTORE
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1012
isTRN_v_undef_Mask
static bool isTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of "vector_shuffle v,...
Definition: AArch64ISelLowering.cpp:9004
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:8147
llvm::AArch64Subtarget::hasSVE
bool hasSVE() const
Definition: AArch64Subtarget.h:461
llvm::AArch64TargetLowering::isDesirableToCommuteWithShift
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
Definition: AArch64ISelLowering.cpp:12188
llvm::AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO
@ GLDFF1S_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:380
llvm::AArch64ISD::CMHS
@ CMHS
Definition: AArch64ISelLowering.h:209
llvm::TargetLoweringBase::setMaximumJumpTableSize
void setMaximumJumpTableSize(unsigned)
Indicate the maximum number of entries in jump tables.
Definition: TargetLoweringBase.cpp:2015
resolveBuildVector
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits)
Definition: AArch64ISelLowering.cpp:9609
llvm::AArch64CC::FIRST_ACTIVE
@ FIRST_ACTIVE
Definition: AArch64BaseInfo.h:276
llvm::AArch64ISD::STNP
@ STNP
Definition: AArch64ISelLowering.h:444
llvm::AArch64ISD::BIC
@ BIC
Definition: AArch64ISelLowering.h:105
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1643
performVectorShiftCombine
static SDValue performVectorShiftCombine(SDNode *N, const AArch64TargetLowering &TLI, TargetLowering::DAGCombinerInfo &DCI)
Optimize a vector shift instruction and its operand if shifted out bits are not used.
Definition: AArch64ISelLowering.cpp:15086
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO
@ GLD1_SXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:357
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2530
llvm::AArch64ISD::ST3LANEpost
@ ST3LANEpost
Definition: AArch64ISelLowering.h:434
llvm::AArch64ISD::UMIN_PRED
@ UMIN_PRED
Definition: AArch64ISelLowering.h:102
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
lookThroughSignExtension
std::pair< SDValue, uint64_t > lookThroughSignExtension(SDValue Val)
Definition: AArch64ISelLowering.cpp:6841
llvm::ISD::MGATHER
@ MGATHER
Definition: ISDOpcodes.h:1172
llvm::IRBuilder<>
llvm::AArch64ISD::FCEIL_MERGE_PASSTHRU
@ FCEIL_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:110
SetCCInfo::AArch64
AArch64SetCCInfo AArch64
Definition: AArch64ISelLowering.cpp:13706
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:145
llvm::SelectionDAG::getVectorIdxConstant
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1483
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1715
llvm::APInt::zextOrTrunc
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:952
llvm::ISD::STEP_VECTOR
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:609
llvm::AArch64ISD::LD2post
@ LD2post
Definition: AArch64ISelLowering.h:413
OP_VZIPR
@ OP_VZIPR
Definition: ARMISelLowering.cpp:8118
getIntrinsicID
static unsigned getIntrinsicID(const SDNode *N)
Definition: AArch64ISelLowering.cpp:9843
AArch64BaseInfo.h
llvm::AArch64ISD::MULHS_PRED
@ MULHS_PRED
Definition: AArch64ISelLowering.h:91
llvm::ISD::MemIndexType
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1316
llvm::AArch64ISD::FRECPX_MERGE_PASSTHRU
@ FRECPX_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:114
getScaledOffsetForBitWidth
static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset, SDLoc DL, unsigned BitWidth)
Definition: AArch64ISelLowering.cpp:16084
llvm::AArch64Subtarget::isTargetDarwin
bool isTargetDarwin() const
Definition: AArch64Subtarget.h:500
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1328
llvm::AArch64Subtarget::isTargetELF
bool isTargetELF() const
Definition: AArch64Subtarget.h:508
llvm::AArch64ISD::STG
@ STG
Definition: AArch64ISelLowering.h:437
performMulVectorExtendCombine
static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG)
Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup)) making use of the vector SExt/ZE...
Definition: AArch64ISelLowering.cpp:12592
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:875
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1351
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:732
llvm::AArch64ISD::CMGE
@ CMGE
Definition: AArch64ISelLowering.h:206
llvm::TargetLoweringBase::getSafeStackPointerLocation
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const
Returns the target-specific address of the unsafe stack pointer.
Definition: TargetLoweringBase.cpp:1904
performSetccAddFolding
static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:13779
llvm::AArch64TargetLowering::getAsmOperandValueType
EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const override
Definition: AArch64ISelLowering.cpp:8324
llvm::ISD::USUBSAT
@ USUBSAT
Definition: ISDOpcodes.h:337
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:455
llvm::TargetLoweringBase::getLibcallName
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
Definition: TargetLowering.h:2840
tryConvertSVEWideCompare
static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14139
llvm::MVT::nxv2i64
@ nxv2i64
Definition: MachineValueType.h:219
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:135
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
MachineBasicBlock.h
llvm::TargetLowering::CW_Constant
@ CW_Constant
Definition: TargetLowering.h:4159
llvm::TargetLowering::SimplifyDemandedBits
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
Definition: TargetLowering.cpp:890
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:466
llvm::AArch64ISD::FCMGTz
@ FCMGTz
Definition: AArch64ISelLowering.h:222
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:785
llvm::AArch64ISD::SMAXV_PRED
@ SMAXV_PRED
Definition: AArch64ISelLowering.h:255
llvm::AArch64ISD::CMLTz
@ CMLTz
Definition: AArch64ISelLowering.h:219
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3531
llvm::AArch64TargetLowering::getScratchRegisters
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
Definition: AArch64ISelLowering.cpp:12177
llvm::SelectionDAG::ReplaceAllUsesWith
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
Definition: SelectionDAG.cpp:9019
llvm::AArch64TargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
Definition: AArch64ISelLowering.cpp:1831
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
getScatterVecOpcode
unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend)
Definition: AArch64ISelLowering.cpp:4198
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2263
llvm::MVT::Glue
@ Glue
Definition: MachineValueType.h:262
llvm::AArch64ISD::FMAX_PRED
@ FMAX_PRED
Definition: AArch64ISelLowering.h:86
llvm::AArch64CC::ANY_ACTIVE
@ ANY_ACTIVE
Definition: AArch64BaseInfo.h:275
llvm::MemOp
Definition: TargetLowering.h:111
llvm::AArch64ISD::DUP
@ DUP
Definition: AArch64ISelLowering.h:152
llvm::CCValAssign::Indirect
@ Indirect
Definition: CallingConvLower.h:52
llvm::SDNode::use_iterator
This class provides iterator support for SDUse operands that use a specific SDNode.
Definition: SelectionDAGNodes.h:719
OP_VDUP3
@ OP_VDUP3
Definition: ARMISelLowering.cpp:8111
APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...
llvm::AArch64TargetLowering::getMaxSupportedInterleaveFactor
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
Definition: AArch64ISelLowering.h:586
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
llvm::AArch64ISD::FCVTZS_MERGE_PASSTHRU
@ FCVTZS_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:125
llvm::MaskedGatherScatterSDNode::getMask
const SDValue & getMask() const
Definition: SelectionDAGNodes.h:2463
isZeroExtended
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3653
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:34
llvm::TargetLowering::C_Memory
@ C_Memory
Definition: TargetLowering.h:4141
llvm::AArch64ISD::LDFF1_MERGE_ZERO
@ LDFF1_MERGE_ZERO
Definition: AArch64ISelLowering.h:341
Shift
bool Shift
Definition: README.txt:468
OP_VUZPL
@ OP_VUZPL
Definition: ARMISelLowering.cpp:8115
performPostLD1Combine
static SDValue performPostLD1Combine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, bool IsLaneOp)
Target-specific DAG combine function for post-increment LD1 (lane) and post-increment LD1R.
Definition: AArch64ISelLowering.cpp:15109
llvm::AArch64Subtarget::getInstrInfo
const AArch64InstrInfo * getInstrInfo() const override
Definition: AArch64Subtarget.h:318
llvm::AArch64ISD::ST1x2post
@ ST1x2post
Definition: AArch64ISelLowering.h:422
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1370
llvm::AArch64TargetLowering::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition: AArch64ISelLowering.cpp:11332
llvm::EVT::isScalableVector
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:161
llvm::MVT::nxv4f16
@ nxv4f16
Definition: MachineValueType.h:230
performIntToFpCombine
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:12769
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::AArch64Subtarget::isMisaligned128StoreSlow
bool isMisaligned128StoreSlow() const
Definition: AArch64Subtarget.h:391
llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition: RuntimeLibcalls.h:30
llvm::AArch64ISD::MOVIshift
@ MOVIshift
Definition: AArch64ISelLowering.h:160
Module.h
Module.h This file contains the declarations for the Module class.
llvm::AArch64ISD::LDP
@ LDP
Definition: AArch64ISelLowering.h:442
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:715
llvm::AttributeList
Definition: Attributes.h:398
llvm::tgtok::Bits
@ Bits
Definition: TGLexer.h:50
ConstantBuildVector
static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:10022
TargetInstrInfo.h
llvm::MemSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:1353
performCommonVectorExtendCombine
static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle, SelectionDAG &DAG)
Combines a dup(sext/zext) node pattern into sext/zext(dup) making use of the vector SExt/ZExt rather ...
Definition: AArch64ISelLowering.cpp:12517
GenericSetCCInfo::Opnd0
const SDValue * Opnd0
Definition: AArch64ISelLowering.cpp:13692
performANDCombine
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: AArch64ISelLowering.cpp:13240
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition: SelectionDAG.cpp:7476
replaceZeroVectorStore
static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of zeros to a vector store by scalar stores of WZR/XZR.
Definition: AArch64ISelLowering.cpp:14801
isSetCC
static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo)
Check whether or not Op is a SET_CC operation, either a generic or an AArch64 lowered one.
Definition: AArch64ISelLowering.cpp:13723
llvm::AArch64TargetLowering::ReconstructShuffle
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const
Definition: AArch64ISelLowering.cpp:8521
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:143
getSignExtendedGatherOpcode
unsigned getSignExtendedGatherOpcode(unsigned Opcode)
Definition: AArch64ISelLowering.cpp:4221
llvm::SelectionDAG::isBaseWithConstantOffset
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
Definition: SelectionDAG.cpp:4306
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:823
getEstimate
static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode, SDValue Operand, SelectionDAG &DAG, int &ExtraSteps)
Definition: AArch64ISelLowering.cpp:7995
llvm::SmallSet< unsigned, 8 >
llvm::CC_AArch64_GHC
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:702
llvm::AArch64ISD::CALL
@ CALL
Definition: AArch64ISelLowering.h:52
llvm::AArch64ISD::GLDNT1_MERGE_ZERO
@ GLDNT1_MERGE_ZERO
Definition: AArch64ISelLowering.h:388
llvm::AArch64ISD::GLDFF1_UXTW_MERGE_ZERO
@ GLDFF1_UXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:372
llvm::SelectionDAG::addCallSiteInfo
void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo)
Definition: SelectionDAG.h:1966
llvm::SelectionDAG::getSplatBuildVector
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:807
llvm::EVT::getVectorVT
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
llvm::codeview::EncodedFramePtrReg::None
@ None
llvm::AArch64TargetLowering::shouldReduceLoadWidth
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const override
Return true if it is profitable to reduce a load to a smaller type.
Definition: AArch64ISelLowering.cpp:11300
EnableAArch64ELFLocalDynamicTLSGeneration
cl::opt< bool > EnableAArch64ELFLocalDynamicTLSGeneration("aarch64-elf-ldtls-generation", cl::Hidden, cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false))
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:356
llvm::AArch64Subtarget::useSVEForFixedLengthVectors
bool useSVEForFixedLengthVectors() const
Definition: AArch64Subtarget.cpp:350
llvm::CCValAssign::BCvt
@ BCvt
Definition: CallingConvLower.h:46
llvm::TargetLoweringBase::setMinFunctionAlignment
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
Definition: TargetLowering.h:2288
getContainerForFixedLengthVector
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
Definition: AArch64ISelLowering.cpp:17803
llvm::TargetLowering::LowerCallTo
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
Definition: SelectionDAGBuilder.cpp:9407
llvm::MVT::fp_fixedlen_vector_valuetypes
static auto fp_fixedlen_vector_valuetypes()
Definition: MachineValueType.h:1440
llvm::ISD::VECREDUCE_FMAX
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1228
llvm::AArch64ISD::CALL_RVMARKER
@ CALL_RVMARKER
Definition: AArch64ISelLowering.h:56
llvm::AArch64_AM::isAdvSIMDModImmType3
static bool isAdvSIMDModImmType3(uint64_t Imm)
Definition: AArch64AddressingModes.h:481
performCSELCombine
static SDValue performCSELCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:15728
llvm::AArch64ISD::CBZ
@ CBZ
Definition: AArch64ISelLowering.h:267
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition: SelectionDAGNodes.h:1246
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::AArch64ISD::FFLOOR_MERGE_PASSTHRU
@ FFLOOR_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:111
llvm::SDNodeFlags::setAllowReassociation
void setAllowReassociation(bool b)
Definition: SelectionDAGNodes.h:420
llvm::AArch64ISD::ADC
@ ADC
Definition: AArch64ISelLowering.h:76
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::AArch64TargetLowering::shouldConsiderGEPOffsetSplit
bool shouldConsiderGEPOffsetSplit() const override
Definition: AArch64ISelLowering.cpp:12119
llvm::tgtok::FalseVal
@ FalseVal
Definition: TGLexer.h:61
getPTrue
static SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, int Pattern)
Definition: AArch64ISelLowering.cpp:3824
llvm::CCValAssign::AExtUpper
@ AExtUpper
Definition: CallingConvLower.h:44
llvm::ISD::isConstantSplatVectorAllZeros
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
Definition: SelectionDAG.cpp:220
llvm::AArch64TargetLowering::EmitLoweredCatchRet
MachineBasicBlock * EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const
Definition: AArch64ISelLowering.cpp:2236
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:855
getVShiftImm
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
Definition: AArch64ISelLowering.cpp:10651
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:421
llvm::MachineFrameInfo::setAdjustsStack
void setAdjustsStack(bool V)
Definition: MachineFrameInfo.h:576
llvm::AArch64_AM::isAdvSIMDModImmType7
static bool isAdvSIMDModImmType7(uint64_t Imm)
Definition: AArch64AddressingModes.h:543
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
llvm::AArch64ISD::SMINV_PRED
@ SMINV_PRED
Definition: AArch64ISelLowering.h:257
llvm::CallBase::getNumArgOperands
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1336
performFpToIntCombine
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point multiply by power of two into floating-point to fixed-point conversion.
Definition: AArch64ISelLowering.cpp:12810
llvm::AArch64ISD::MOVImsl
@ MOVImsl
Definition: AArch64ISelLowering.h:162
llvm::AArch64ISD::GLDFF1_MERGE_ZERO
@ GLDFF1_MERGE_ZERO
Definition: AArch64ISelLowering.h:370
STLExtras.h
llvm::AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO
@ GLDFF1S_UXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:381
llvm::ISD::VAEND
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1041
performXorCombine
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:12401
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1335
llvm::ISD::VECREDUCE_SEQ_FADD
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1212
llvm::AArch64ISD::FCMGT
@ FCMGT
Definition: AArch64ISelLowering.h:212
llvm::AArch64ISD::TRN2
@ TRN2
Definition: AArch64ISelLowering.h:181
optimizeLogicalImm
static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, const APInt &Demanded, TargetLowering::TargetLoweringOpt &TLO, unsigned NewOpc)
Definition: AArch64ISelLowering.cpp:1619
llvm::AArch64ISD::GLDFF1_IMM_MERGE_ZERO
@ GLDFF1_IMM_MERGE_ZERO
Definition: AArch64ISelLowering.h:376
llvm::AArch64ISD::CCMP
@ CCMP
Definition: AArch64ISelLowering.h:141
llvm::AArch64ISD::FMINNMV_PRED
@ FMINNMV_PRED
Definition: AArch64ISelLowering.h:317
llvm::SelectionDAG::getZExtOrTrunc
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
Definition: SelectionDAG.cpp:1275
llvm::AArch64ISD::URSHR_I
@ URSHR_I
Definition: AArch64ISelLowering.h:198
llvm::AArch64ISD::FRECPS
@ FRECPS
Definition: AArch64ISelLowering.h:296
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:267
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::StringRef::slice
LLVM_NODISCARD StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:732
llvm::ShuffleVectorInst::isReverseMask
static bool isReverseMask(ArrayRef< int > Mask)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
Definition: Instructions.cpp:2162
llvm::AArch64ISD::GLD1S_IMM_MERGE_ZERO
@ GLD1S_IMM_MERGE_ZERO
Definition: AArch64ISelLowering.h:367
constructDup
static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT, unsigned Opcode, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:9284
llvm::ISD::SETOEQ
@ SETOEQ
Definition: ISDOpcodes.h:1358
performIntrinsicCombine
static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:14332
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:139
GenericSetCCInfo::Opnd1
const SDValue * Opnd1
Definition: AArch64ISelLowering.cpp:13693
llvm::AArch64FunctionInfo::setVarArgsStackIndex
void setVarArgsStackIndex(int Index)
Definition: AArch64MachineFunctionInfo.h:308
llvm::CallingConv::WebKit_JS
@ WebKit_JS
Definition: CallingConv.h:58
llvm::BlockAddressSDNode
Definition: SelectionDAGNodes.h:2114
llvm::AArch64ISD::REV32
@ REV32
Definition: AArch64ISelLowering.h:183
llvm::MVT::v2f64
@ v2f64
Definition: MachineValueType.h:172
llvm::CCValAssign::Trunc
@ Trunc
Definition: CallingConvLower.h:47
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:104
llvm::AArch64ISD::TLSDESC_CALLSEQ
@ TLSDESC_CALLSEQ
Definition: AArch64ISelLowering.h:60
llvm::TargetLoweringBase::emitPatchPoint
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
Definition: TargetLoweringBase.cpp:1164
SelectionDAG.h
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::SelectionDAG::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:442
llvm::MVT::nxv8i16
@ nxv8i16
Definition: MachineValueType.h:207
llvm::AArch64ISD::LD1x4post
@ LD1x4post
Definition: AArch64ISelLowering.h:421
llvm::TargetLoweringBase::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Definition: TargetLowering.h:743
llvm::TargetLoweringBase::MaxGluedStoresPerMemcpy
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
Definition: TargetLowering.h:3114
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:299
Use.h
This defines the Use class.
llvm::Data
@ Data
Definition: SIMachineScheduler.h:56
llvm::getOffset
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
Definition: RuntimeDyld.cpp:170
llvm::ISD::STRICT_FP_TO_UINT
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:429
OP_VEXT1
@ OP_VEXT1
Definition: ARMISelLowering.cpp:8112
llvm::GlobalValue::hasExternalWeakLinkage
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:446
llvm::AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO
@ GLD1S_UXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:365
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1366
llvm::CCState::AnalyzeCallOperands
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
Definition: CallingConvLower.cpp:131
isSingletonEXTMask
static bool isSingletonEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
Definition: AArch64ISelLowering.cpp:8747
llvm::ISD::SMAX
@ SMAX
Definition: ISDOpcodes.h:627
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:447
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
llvm::AArch64ISD::SHL_PRED
@ SHL_PRED
Definition: AArch64ISelLowering.h:94
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::ISD::ATOMIC_LOAD_OR
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1144
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:867
im
#define im(i)
llvm::AArch64ISD::FCMP
@ FCMP
Definition: AArch64ISelLowering.h:146
combineSVEPrefetchVecBaseImmOff
static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG, unsigned ScalarSizeInBytes)
Combines a node carrying the intrinsic aarch64_sve_prf<T>_gather_scalar_offset into a node that uses ...
Definition: AArch64ISelLowering.cpp:16490
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::AArch64ISD::SUNPKLO
@ SUNPKLO
Definition: AArch64ISelLowering.h:301
LowerSVEIntrinsicIndex
static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14083
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:956
llvm::ISD::ArgFlagsTy::isSwiftSelf
bool isSwiftSelf() const
Definition: TargetCallingConv.h:97
llvm::Triple::isWindowsMSVCEnvironment
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:557
llvm::AArch64II::MO_TLS
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
Definition: AArch64BaseInfo.h:670
MachineRegisterInfo.h
llvm::EVT::changeTypeToInteger
EVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:114
llvm::AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU
@ SIGN_EXTEND_INREG_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:126
KnownBits.h
llvm::ShuffleVectorSDNode
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
Definition: SelectionDAGNodes.h:1484
llvm::AArch64ISD::LD4LANEpost
@ LD4LANEpost
Definition: AArch64ISelLowering.h:432
llvm::MVT::isScalableVector
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
Definition: MachineValueType.h:373
convertToScalableVector
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
Definition: AArch64ISelLowering.cpp:17912
llvm::AArch64ISD::LD3DUPpost
@ LD3DUPpost
Definition: AArch64ISelLowering.h:427
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:1960
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
llvm::ComputeValueVTs
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:124
llvm::CallingConv::AArch64_SVE_VectorCall
@ AArch64_SVE_VectorCall
Calling convention between AArch64 SVE functions.
Definition: CallingConv.h:242
getReductionSDNode
static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:10937
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
performSTORECombine
static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:15254
llvm::yaml::isInteger
static bool isInteger(StringRef Val)
Definition: ELFYAML.cpp:1441
llvm::AArch64ISD::ADDlow
@ ADDlow
Definition: AArch64ISelLowering.h:63
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:130
llvm::AArch64ISD::STRICT_FCMP
@ STRICT_FCMP
Definition: AArch64ISelLowering.h:409
llvm::AArch64ISD::GLD1_MERGE_ZERO
@ GLD1_MERGE_ZERO
Definition: AArch64ISelLowering.h:352
performSelectCombine
static SDValue performSelectCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with the compare-mask instruct...
Definition: AArch64ISelLowering.cpp:15958
llvm::MVT::nxv8bf16
@ nxv8bf16
Definition: MachineValueType.h:238
MachineValueType.h
OP_VDUP2
@ OP_VDUP2
Definition: ARMISelLowering.cpp:8110
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
isPackedVectorType
static bool isPackedVectorType(EVT VT, SelectionDAG &DAG)
Returns true if VT's elements occupy the lowest bit positions of its associated register class withou...
Definition: AArch64ISelLowering.cpp:192
llvm::ISD::BRIND
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:942
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::ISD::ROTL
@ ROTL
Definition: ISDOpcodes.h:660
llvm::MaskedLoadSDNode::getPassThru
const SDValue & getPassThru() const
Definition: SelectionDAGNodes.h:2383
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
SetCCInfoAndKind::IsAArch64
bool IsAArch64
Definition: AArch64ISelLowering.cpp:13714
PerfectShuffleTable
static const unsigned PerfectShuffleTable[6561+1]
Definition: AArch64PerfectShuffle.h:25
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MaskedGatherSDNode::getPassThru
const SDValue & getPassThru() const
Definition: SelectionDAGNodes.h:2486
llvm::AArch64ISD::MVNIshift
@ MVNIshift
Definition: AArch64ISelLowering.h:164
llvm::TargetLowering::TargetLoweringOpt::CombineTo
bool CombineTo(SDValue O, SDValue N)
Definition: TargetLowering.h:3297
llvm::classifyEHPersonality
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Definition: EHPersonalities.cpp:21
llvm::LLT::fixed_vector
static LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelTypeImpl.h:75
llvm::ISD::VECREDUCE_UMAX
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1240
llvm::MVT::v4bf16
@ v4bf16
Definition: MachineValueType.h:147
llvm::AArch64ISD::SST1_UXTW_PRED
@ SST1_UXTW_PRED
Definition: AArch64ISelLowering.h:398
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::AArch64ISD::FCMLTz
@ FCMLTz
Definition: AArch64ISelLowering.h:224
llvm::AArch64FunctionInfo::setHasSwiftAsyncContext
void setHasSwiftAsyncContext(bool HasContext)
Definition: AArch64MachineFunctionInfo.h:401
performNEONPostLDSTCombine
static SDValue performNEONPostLDSTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Target-specific DAG combine function for NEON load/store intrinsics to merge base address updates.
Definition: AArch64ISelLowering.cpp:15273
llvm::MVT::integer_valuetypes
static auto integer_valuetypes()
Definition: MachineValueType.h:1411
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:746
llvm::User::getOperandUse
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:885
Instruction.h
llvm::AArch64ISD::CMEQz
@ CMEQz
Definition: AArch64ISelLowering.h:215
CommandLine.h
llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1335
llvm::SelectionDAG::getVScale
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
Definition: SelectionDAG.h:956
llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:963
llvm::TargetLowering::TargetLoweringOpt::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3284
llvm::MachineFunction::ArgRegPair
Structure used to represent pair of argument number after call lowering and register used to transfer...
Definition: MachineFunction.h:410
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Type::isArrayTy
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:225
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition: SelectionDAG.cpp:7426
performLD1ReplicateCombine
static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14711
isZerosVector
static bool isZerosVector(const SDNode *N)
isZerosVector - Check whether SDNode N is a zero-filled vector.
Definition: AArch64ISelLowering.cpp:2282
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:621
llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:363
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:672
llvm::APInt::isNonNegative
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:369
llvm::MVT::v8f16
@ v8f16
Definition: MachineValueType.h:137
llvm::AArch64ISD::FMUL_PRED
@ FMUL_PRED
Definition: AArch64ISelLowering.h:88
llvm::AArch64_AM::encodeAdvSIMDModImmType6
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
Definition: AArch64AddressingModes.h:533
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:632
llvm::SelectionDAG::getMaskedStore
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
Definition: SelectionDAG.cpp:7666
llvm::ISD::STRICT_FP_TO_SINT
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:428
llvm::CCValAssign::AExt
@ AExt
Definition: CallingConvLower.h:39
llvm::AArch64ISD::FRINT_MERGE_PASSTHRU
@ FRINT_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:115
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1113
llvm::ISD::GlobalAddress
@ GlobalAddress
Definition: ISDOpcodes.h:78
llvm::AArch64II::MO_G0
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
Definition: AArch64BaseInfo.h:644
llvm::AArch64ISD::CSEL
@ CSEL
Definition: AArch64ISelLowering.h:68
llvm::Triple::isOSBinFormatELF
bool isOSBinFormatELF() const
Tests whether the OS uses the ELF binary format.
Definition: Triple.h:632
llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:694
llvm::Log2_64
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:602
llvm::MOStridedAccess
static const MachineMemOperand::Flags MOStridedAccess
Definition: AArch64InstrInfo.h:33
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
GlobalValue.h
isZIPMask
static bool isZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
Definition: AArch64ISelLowering.cpp:8921
llvm::AArch64TargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: AArch64ISelLowering.cpp:16600
performVecReduceAddCombineWithUADDLP
static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:12278
llvm::MVT::nxv2bf16
@ nxv2bf16
Definition: MachineValueType.h:236
llvm::TargetLoweringBase::shouldLocalize
virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const
Check whether or not MI needs to be moved close to its uses.
Definition: TargetLoweringBase.cpp:2288
isConstantSplatVectorMaskForType
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT MemVT)
Definition: AArch64ISelLowering.cpp:13124
tryCombineToBSL
static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: AArch64ISelLowering.cpp:13016
llvm::AArch64ISD::MVNImsl
@ MVNImsl
Definition: AArch64ISelLowering.h:165
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1113
performFDivCombine
static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point divide by power of two into fixed-point to floating-point conversion.
Definition: AArch64ISelLowering.cpp:12885
llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition: SelectionDAGNodes.h:1458
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:33
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::AArch64TargetLowering::isFMAFasterThanFMulAndFAdd
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
Definition: AArch64ISelLowering.cpp:12140
llvm::ISD::CTLZ
@ CTLZ
Definition: ISDOpcodes.h:668
llvm::AArch64ISD::ST4LANEpost
@ ST4LANEpost
Definition: AArch64ISelLowering.h:435
llvm::AArch64CC::VC
@ VC
Definition: AArch64BaseInfo.h:262
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::AArch64ISD::TRN1
@ TRN1
Definition: AArch64ISelLowering.h:180
IsSVECntIntrinsic
static bool IsSVECntIntrinsic(SDValue S)
Definition: AArch64ISelLowering.cpp:12453
llvm::AArch64RegisterInfo::hasSVEArgsOrReturn
static bool hasSVEArgsOrReturn(const MachineFunction *MF)
Definition: AArch64RegisterInfo.cpp:66
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
llvm::AArch64ISD::SST1_SCALED_PRED
@ SST1_SCALED_PRED
Definition: AArch64ISelLowering.h:397
llvm::AArch64ISD::CMGEz
@ CMGEz
Definition: AArch64ISelLowering.h:216
SelectionDAGNodes.h
llvm::AArch64ISD::SMAXV
@ SMAXV
Definition: AArch64ISelLowering.h:250
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:679
llvm::PatternMatch::m_ZExtOrSExt
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:1658
Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...
llvm::AArch64FunctionInfo::getBytesInStackArgArea
unsigned getBytesInStackArgArea() const
Definition: AArch64MachineFunctionInfo.h:181
llvm::AArch64ISD::SRL_PRED
@ SRL_PRED
Definition: AArch64ISelLowering.h:98
llvm::Failed
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::SDNode::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this node.
Definition: SelectionDAGNodes.h:692
llvm::ISD::Constant
@ Constant
Definition: ISDOpcodes.h:76
llvm::AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Definition: AArch64ISelLowering.cpp:17448
llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:729
llvm::SelectionDAG::getBoolExtOrTrunc
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
Definition: SelectionDAG.cpp:1281
llvm::AArch64TargetLowering::isAllActivePredicate
bool isAllActivePredicate(SDValue N) const
Definition: AArch64ISelLowering.cpp:18699
llvm::AArch64TargetLowering::lowerInterleavedStore
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a stN intrinsic.
Definition: AArch64ISelLowering.cpp:11814
llvm::AArch64ISD::GLDNT1S_MERGE_ZERO
@ GLDNT1S_MERGE_ZERO
Definition: AArch64ISelLowering.h:390
llvm::ISD::ArgFlagsTy::isByVal
bool isByVal() const
Definition: TargetCallingConv.h:85
llvm::AArch64ISD::LD1_MERGE_ZERO
@ LD1_MERGE_ZERO
Definition: AArch64ISelLowering.h:337
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalizeOps
bool isBeforeLegalizeOps() const
Definition: TargetLowering.h:3537
llvm::SelectionDAG::getTargetBlockAddress
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:730
llvm::AArch64Subtarget::hasLS64
bool hasLS64() const
Definition: AArch64Subtarget.h:542
llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:640
llvm::AArch64ISD::RET_FLAG
@ RET_FLAG
Definition: AArch64ISelLowering.h:66
llvm::AArch64FunctionInfo::setVarArgsFPRSize
void setVarArgsFPRSize(unsigned Size)
Definition: AArch64MachineFunctionInfo.h:320
llvm::AArch64ISD::MRS
@ MRS
Definition: AArch64ISelLowering.h:289
llvm::ISD::SETGE
@ SETGE
Definition: ISDOpcodes.h:1377
llvm::AArch64_AM::isLogicalImmediate
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
Definition: AArch64AddressingModes.h:275
llvm::CC_AArch64_DarwinPCS
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::AArch64Subtarget::isLittleEndian
bool isLittleEndian() const
Definition: AArch64Subtarget.h:498
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:150
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:720
llvm::AArch64ISD::GLD1S_SCALED_MERGE_ZERO
@ GLD1S_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:362
llvm::User
Definition: User.h:44
llvm::AArch64_AM::isAdvSIMDModImmType5
static bool isAdvSIMDModImmType5(uint64_t Imm)
Definition: AArch64AddressingModes.h:511
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:951
llvm::AArch64Subtarget::getMaximumJumpTableSize
unsigned getMaximumJumpTableSize() const
Definition: AArch64Subtarget.h:444
llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
llvm::ISD::CATCHRET
@ CATCHRET
CATCHRET - Represents a return from a catch block funclet.
Definition: ISDOpcodes.h:999
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:747
llvm::TargetLoweringBase::setOperationPromotedToType
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
Definition: TargetLowering.h:2274
llvm::SelectionDAG::getTargetLoweringInfo
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:443
llvm::AArch64Subtarget::isTargetILP32
bool isTargetILP32() const
Definition: AArch64Subtarget.h:511
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
Intrinsics.h
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2352
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::AArch64ISD::ORRi
@ ORRi
Definition: AArch64ISelLowering.h:169
getPackedSVEVectorVT
static EVT getPackedSVEVectorVT(EVT VT)
Definition: AArch64ISelLowering.cpp:130
llvm::AArch64ISD::VSRI
@ VSRI
Definition: AArch64ISelLowering.h:202
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:56
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1348
llvm::AArch64ISD::LASTB
@ LASTB
Definition: AArch64ISelLowering.h:308
Twine.h
isUZP_v_undef_Mask
static bool isUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of "vector_shuffle v,...
Definition: AArch64ISelLowering.cpp:8985
llvm::EVT::is64BitVector
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:181
llvm::JumpTableSDNode
Definition: SelectionDAGNodes.h:1803
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:309
llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:485
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3170
getSVEContainerType
static MVT getSVEContainerType(EVT ContentTy)
Definition: AArch64ISelLowering.cpp:14627
llvm::MVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: MachineValueType.h:1062
llvm::CC_AArch64_WebKit_JS
bool CC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::AArch64ISD::SRA_PRED
@ SRA_PRED
Definition: AArch64ISelLowering.h:97
llvm::AArch64ISD::UDIV_PRED
@ UDIV_PRED
Definition: AArch64ISelLowering.h:100
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
emitConditionalComparison
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
Definition: AArch64ISelLowering.cpp:2593
llvm::SelectionDAG::MaskedValueIsZero
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
Definition: SelectionDAG.cpp:2438
llvm::AArch64FunctionInfo::setBytesInStackArgArea
void setBytesInStackArgArea(unsigned bytes)
Definition: AArch64MachineFunctionInfo.h:182
llvm::AArch64TargetLowering::getNumInterleavedAccesses
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
Definition: AArch64ISelLowering.cpp:11642
llvm::AArch64_AM::isAdvSIMDModImmType2
static bool isAdvSIMDModImmType2(uint64_t Imm)
Definition: AArch64AddressingModes.h:466
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:883
llvm::AArch64ISD::MUL_PRED
@ MUL_PRED
Definition: AArch64ISelLowering.h:90
TargetMachine.h
llvm::TargetLowering::DAGCombinerInfo
Definition: TargetLowering.h:3525
llvm::CCState::AnalyzeReturn
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
Definition: CallingConvLower.cpp:118
WidenVector
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
Definition: AArch64ISelLowering.cpp:8489
llvm::AArch64ISD::CMLEz
@ CMLEz
Definition: AArch64ISelLowering.h:218
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:735
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:321
llvm::ISD::SRA
@ SRA
Definition: ISDOpcodes.h:658
llvm::CallingConv::Swift
@ Swift
Definition: CallingConv.h:73
llvm::CallingConv::C
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition: TargetLowering.h:233
performSetccMergeZeroCombine
static SDValue performSetccMergeZeroCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:15767
llvm::ISD::LLROUND
@ LLROUND
Definition: ISDOpcodes.h:887
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:481
llvm::codeview::ClassOptions::None
@ None
llvm::isIntOrFPConstant
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
Definition: SelectionDAGNodes.h:1696
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:237
llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1939
removeRedundantInsertVectorElt
static SDValue removeRedundantInsertVectorElt(SDNode *N)
Definition: AArch64ISelLowering.cpp:16534
llvm::Mips::GPRIdx
@ GPRIdx
Definition: MipsRegisterBankInfo.cpp:44
llvm::MVT::nxv2f16
@ nxv2f16
Definition: MachineValueType.h:229
llvm::AArch64ISD::FRSQRTE
@ FRSQRTE
Definition: AArch64ISelLowering.h:297
llvm::AArch64CC::HS
@ HS
Definition: AArch64BaseInfo.h:257
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
llvm::MachinePointerInfo::getGOT
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
Definition: MachineOperand.cpp:1012
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition: TargetLowering.h:2168
llvm::AArch64TargetLowering::getIRStackGuard
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
Definition: AArch64ISelLowering.cpp:17578
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:596
llvm::TargetLoweringBase::MaxLoadsPerMemcmpOptSize
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3129
llvm::ShuffleVectorSDNode::isSplat
bool isSplat() const
Definition: SelectionDAGNodes.h:1506
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Function::arg_end
arg_iterator arg_end()
Definition: Function.h:803
llvm::AArch64ISD::ADD_PRED
@ ADD_PRED
Definition: AArch64ISelLowering.h:80
llvm::isUIntN
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:455
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::MVT::v4f64
@ v4f64
Definition: MachineValueType.h:174
llvm::SDNode::uses
iterator_range< use_iterator > uses()
Definition: SelectionDAGNodes.h:781
llvm::AArch64ISD::MOVIedit
@ MOVIedit
Definition: AArch64ISelLowering.h:161
llvm::AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
@ SINT_TO_FP_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:123
llvm::AArch64TargetLowering::isExtractSubvectorCheap
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
Definition: AArch64ISelLowering.cpp:12228
llvm::ISD::ATOMIC_LOAD_AND
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1142
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
calculatePreExtendType
static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG)
Calculates what the pre-extend type is, based on the extension operation node provided by Extend.
Definition: AArch64ISelLowering.cpp:12478
llvm::Triple::isOSBinFormatMachO
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:645
llvm::MaskedLoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2380
llvm::AArch64TargetLowering::getTargetMMOFlags
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
Definition: AArch64ISelLowering.cpp:11648
llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:145
getGatherScatterIndexIsExtended
bool getGatherScatterIndexIsExtended(SDValue Index)
Definition: AArch64ISelLowering.cpp:4243
llvm::Instruction
Definition: Instruction.h:46
llvm::AArch64ISD::UADDLP
@ UADDLP
Definition: AArch64ISelLowering.h:240
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:146
llvm::RetCC_AArch64_AAPCS
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Concat
static constexpr int Concat[]
Definition: X86InterleavedAccess.cpp:239
llvm::CC_AArch64_AAPCS
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
tryCombineToEXTR
static SDValue tryCombineToEXTR(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
EXTR instruction extracts a contiguous chunk of bits from two existing registers viewed as a high/low...
Definition: AArch64ISelLowering.cpp:12976
ShuffleOps
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
Definition: InstCombineVectorOps.cpp:686
llvm::AArch64ISD::ST1_PRED
@ ST1_PRED
Definition: AArch64ISelLowering.h:393
legalizeSVEGatherPrefetchOffsVec
static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG)
Legalize the gather prefetch (scalar + vector addressing mode) when the offset vector is an unpacked ...
Definition: AArch64ISelLowering.cpp:16467
llvm::AArch64ISD::STZ2G
@ STZ2G
Definition: AArch64ISelLowering.h:440
llvm::ShuffleVectorSDNode::getMask
ArrayRef< int > getMask() const
Definition: SelectionDAGNodes.h:1496
llvm::AArch64TargetLowering::getScalingFactorCost
InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition: AArch64ISelLowering.cpp:12124
llvm::AArch64ISD::UMINV_PRED
@ UMINV_PRED
Definition: AArch64ISelLowering.h:258
checkValueWidth
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
Definition: AArch64ISelLowering.cpp:15412
llvm::TargetLoweringBase::setIndexedStoreAction
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
Definition: TargetLowering.h:2225
llvm::AArch64ISD::GLD1S_UXTW_MERGE_ZERO
@ GLD1S_UXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:363
llvm::AArch64ISD::DUPLANE16
@ DUPLANE16
Definition: AArch64ISelLowering.h:154
llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:739
llvm::IRBuilderBase::getInt8Ty
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:508
llvm::AArch64ISD::SST1_PRED
@ SST1_PRED
Definition: AArch64ISelLowering.h:396
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1631
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:748
llvm::ShuffleVectorInst::isExtractSubvectorMask
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
Definition: Instructions.cpp:2233
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::MVT::v16f16
@ v16f16
Definition: MachineValueType.h:138
llvm::TargetLoweringBase::getSDagStackGuard
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
Definition: TargetLoweringBase.cpp:1991
llvm::AArch64FunctionInfo::incNumLocalDynamicTLSAccesses
void incNumLocalDynamicTLSAccesses()
Definition: AArch64MachineFunctionInfo.h:299
llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:655
APFloat.h
This file declares a class to represent arbitrary precision floating point values and provide a varie...
llvm::MVT::nxv4i8
@ nxv4i8
Definition: MachineValueType.h:198
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:882
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:881
llvm::AArch64CC::LE
@ LE
Definition: AArch64BaseInfo.h:268
llvm::MVT::nxv4f32
@ nxv4f32
Definition: MachineValueType.h:242
OperandTraits.h
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1726
llvm::CallingConv::Fast
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
llvm::AArch64ISD::ZIP2
@ ZIP2
Definition: AArch64ISelLowering.h:177
ErrorHandling.h
llvm::AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO
@ GLDFF1S_SXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:382
ReplaceCMP_SWAP_128Results
static void ReplaceCMP_SWAP_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:17160
llvm::bitc::NoNaNs
@ NoNaNs
Definition: LLVMBitCodes.h:468
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:901
llvm::AArch64FunctionInfo::setSRetReturnReg
void setSRetReturnReg(unsigned Reg)
Definition: AArch64MachineFunctionInfo.h:323
llvm::AArch64FunctionInfo::getVarArgsStackIndex
int getVarArgsStackIndex() const
Definition: AArch64MachineFunctionInfo.h:307
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7203
llvm::TargetLowering::C_Immediate
@ C_Immediate
Definition: TargetLowering.h:4142
llvm::AArch64ISD::FCMEQ
@ FCMEQ
Definition: AArch64ISelLowering.h:210
llvm::AArch64CC::PL
@ PL
Definition: AArch64BaseInfo.h:260
llvm::AArch64ISD::VSLI
@ VSLI
Definition: AArch64ISelLowering.h:201
EmitVectorComparison
static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode CC, bool NoNans, EVT VT, const SDLoc &dl, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:10772
DebugLoc.h
llvm::MachineFrameInfo::hasMustTailInVarArgFunc
bool hasMustTailInVarArgFunc() const
Returns true if the function is variadic and contains a musttail call.
Definition: MachineFrameInfo.h:600
llvm::ms_demangle::QualifierMangleMode::Drop
@ Drop
isVShiftLImm
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
Definition: AArch64ISelLowering.cpp:10671
llvm::EVT::changeVectorElementType
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
UseTlsOffset
static Value * UseTlsOffset(IRBuilderBase &IRB, unsigned Offset)
Definition: AArch64ISelLowering.cpp:17568
llvm::SelectionDAG::SplitVectorOperand
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
Definition: SelectionDAG.h:1929
llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition: MachineFrameInfo.h:492
llvm::AArch64ISD::FABS_MERGE_PASSTHRU
@ FABS_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:109
llvm::TargetLoweringBase::MaxStoresPerMemcpy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
Definition: TargetLowering.h:3106
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
tryAdvSIMDModImm64
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition: AArch64ISelLowering.cpp:9632
llvm::AArch64ISD::FP_ROUND_MERGE_PASSTHRU
@ FP_ROUND_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:120
llvm::BitVector
Definition: BitVector.h:74
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:632
llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition: CallingConvLower.h:155
llvm::ISD::SMULO
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:318
llvm::APInt::sle
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1259
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1361
llvm::ISD::UNSIGNED_SCALED
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1319
llvm::ISD::ABDS
@ ABDS
Definition: ISDOpcodes.h:621
llvm::MemSDNode::isNonTemporal
bool isNonTemporal() const
Definition: SelectionDAGNodes.h:1288
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::ISD::SPLAT_VECTOR
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:590
PatternMatch.h
llvm::TargetLoweringBase::insertSSPDeclarations
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
Definition: TargetLoweringBase.cpp:1978
llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition: SelectionDAGNodes.h:1712
llvm::APInt::countTrailingZeros
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1700
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1132
llvm::AArch64Subtarget::getProcFamily
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
Definition: AArch64Subtarget.h:337
llvm::ISD::FSINCOS
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:915
llvm::TargetLoweringBase::TypeWidenVector
@ TypeWidenVector
Definition: TargetLowering.h:213
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:151
llvm::AArch64TargetLowering::hasPairedLoad
bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
Definition: AArch64ISelLowering.cpp:11628
llvm::ConstantSDNode::isOne
bool isOne() const
Definition: SelectionDAGNodes.h:1565
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1304
llvm::AArch64ISD::VSHL
@ VSHL
Definition: AArch64ISelLowering.h:189
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:650
llvm::AArch64ISD::SETCC_MERGE_ZERO
@ SETCC_MERGE_ZERO
Definition: AArch64ISelLowering.h:131
llvm::CallInst::isTailCall
bool isTailCall() const
Definition: Instructions.h:1669
llvm::MVT::v1i64
@ v1i64
Definition: MachineValueType.h:117
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition: NVPTX.h:122
llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition: TargetLowering.h:2116
foldVectorXorShiftIntoCmp
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Turn vector tests of the signbit in the form of: xor (sra X, elt_size(X)-1), -1 into: cmge X,...
Definition: AArch64ISelLowering.cpp:12240
performMulCombine
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:12610
llvm::AArch64ISD::UUNPKLO
@ UUNPKLO
Definition: AArch64ISelLowering.h:303
llvm::AArch64ISD::SBC
@ SBC
Definition: AArch64ISelLowering.h:77
llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition: SelectionDAGNodes.h:1125
llvm::AArch64_AM::getFP32Imm
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
Definition: AArch64AddressingModes.h:393
llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
llvm::array_lengthof
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1377
llvm::SDValue::getValueSizeInBits
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
Definition: SelectionDAGNodes.h:192
llvm::AArch64ISD::SADDV_PRED
@ SADDV_PRED
Definition: AArch64ISelLowering.h:253
llvm::isIntN
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:460
llvm::AArch64ISD::SHADD
@ SHADD
Definition: AArch64ISelLowering.h:232
splitStores
static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Definition: AArch64ISelLowering.cpp:14921
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:273
llvm::AArch64ISD::UITOF
@ UITOF
Definition: AArch64ISelLowering.h:280
llvm::StringRef::getAsInteger
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:510
llvm::LinearPolySize< ElementCount >::getFixed
static ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:284
llvm::MVT::v4i16
@ v4i16
Definition: MachineValueType.h:91
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
llvm::CCValAssign::isRegLoc
bool isRegLoc() const
Definition: CallingConvLower.h:145
llvm::AArch64TargetLowering::shouldExpandAtomicRMWInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Definition: AArch64ISelLowering.cpp:17408
isZIP_v_undef_Mask
static bool isZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of "vector_shuffle v,...
Definition: AArch64ISelLowering.cpp:8966
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:315
llvm::MemSDNode::isVolatile
bool isVolatile() const
Definition: SelectionDAGNodes.h:1287
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:78
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2049
llvm::TargetLoweringBase::getTypeToTransformTo
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Definition: TargetLowering.h:941
llvm::StoreSDNode::isTruncatingStore
bool isTruncatingStore() const
Return true if the op does a truncation before store.
Definition: SelectionDAGNodes.h:2307
Type.h
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1362
llvm::MVT::nxv4i16
@ nxv4i16
Definition: MachineValueType.h:206
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::Triple::isOSMSVCRT
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:584
performExtractVectorEltCombine
static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:13410
llvm::AArch64TargetLowering::getOptimalMemOpType
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
Definition: AArch64ISelLowering.cpp:11976
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:96
llvm::AArch64TargetLowering::lowerInterleavedLoad
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a ldN intrinsic.
Definition: AArch64ISelLowering.cpp:11685
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1367
llvm::MVT::getScalarType
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Definition: MachineValueType.h:515
LowerADDC_ADDE_SUBC_SUBE
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3167
llvm::IRBuilderBase::CreatePointerCast
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2159
llvm::AArch64ISD::UQSHL_I
@ UQSHL_I
Definition: AArch64ISelLowering.h:195
llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:683
llvm::AArch64_AM::isAdvSIMDModImmType9
static bool isAdvSIMDModImmType9(uint64_t Imm)
Definition: AArch64AddressingModes.h:573
performSVEAndCombine
static SDValue performSVEAndCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition: AArch64ISelLowering.cpp:13151
llvm::CallingConv::SwiftTail
@ SwiftTail
SwiftTail - This follows the Swift calling convention in how arguments are passed but guarantees tail...
Definition: CallingConv.h:92
llvm::AArch64ISD::PTEST
@ PTEST
Definition: AArch64ISelLowering.h:320
llvm::CallingConv::CXX_FAST_TLS
@ CXX_FAST_TLS
Definition: CallingConv.h:76
changeIntCCToAArch64CC
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC)
changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 CC
Definition: AArch64ISelLowering.cpp:2301
LowerTruncateVectorStore
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, EVT VT, EVT MemVT, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:4533
splitInt128
static std::pair< SDValue, SDValue > splitInt128(SDValue N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:17099
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1502
isOperandOfVmullHighP64
static bool isOperandOfVmullHighP64(Value *Op)
Check if Op could be used with vmull_high_p64 intrinsic.
Definition: AArch64ISelLowering.cpp:11520
llvm::AArch64FunctionInfo::setVarArgsFPRIndex
void setVarArgsFPRIndex(int Index)
Definition: AArch64MachineFunctionInfo.h:317
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::APInt::ashr
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:963
llvm::MVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: MachineValueType.h:340
llvm::ISD::POST_INC
@ POST_INC
Definition: ISDOpcodes.h:1304
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:250
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:177
llvm::TargetLoweringBase::getMemValueType
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Definition: TargetLowering.h:1428
getPredicateForScalableVector
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
Definition: AArch64ISelLowering.cpp:17896
findEXTRHalf
static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, bool &FromHi)
An EXTR instruction is made up of two shifts, ORed together.
Definition: AArch64ISelLowering.cpp:12954
AArch64AddressingModes.h
llvm::MVT::f80
@ f80
Definition: MachineValueType.h:57
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:873
llvm::VTSDNode::getVT
EVT getVT() const
Definition: SelectionDAGNodes.h:2222
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
llvm::TargetLoweringBase::PredictableSelectIsExpensive
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
Definition: TargetLowering.h:3147
NarrowVector
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
Definition: AArch64ISelLowering.cpp:8509
setInfoSVEStN
static bool setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL, AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI)
Set the IntrinsicInfo for the aarch64_sve_st<N> intrinsics.
Definition: AArch64ISelLowering.cpp:11143
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:377
llvm::TargetRegisterInfo::regmaskSubsetEqual
bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const
Return true if all bits that are set in mask mask0 are also set in mask1.
Definition: TargetRegisterInfo.cpp:491
llvm::MVT::v2i8
@ v2i8
Definition: MachineValueType.h:77
llvm::TargetLowering::TargetLoweringOpt
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Definition: TargetLowering.h:3283
llvm::ISD::BlockAddress
@ BlockAddress
Definition: ISDOpcodes.h:84
performVSelectCombine
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:15896
llvm::DataLayout::getPrefTypeAlign
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:834
llvm::MVT::v4i64
@ v4i64
Definition: MachineValueType.h:120
llvm::AArch64TargetLowering::getPromotedVTForPredicate
EVT getPromotedVTForPredicate(EVT VT) const
Definition: AArch64ISelLowering.cpp:18703
llvm::ISD::SETUNE
@ SETUNE
Definition: ISDOpcodes.h:1371
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:388
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::ISD::STRICT_FSETCCS
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:463
areExtractExts
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
Definition: AArch64ISelLowering.cpp:11504
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition: TargetLowering.h:2185
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
LowerPREFETCH
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3234
llvm::MVT::nxv16i8
@ nxv16i8
Definition: MachineValueType.h:200
llvm::AArch64ISD::FCVTZU_MERGE_PASSTHRU
@ FCVTZU_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:124
llvm::APInt::isOneValue
bool isOneValue() const
Determine if this is a value of 1.
Definition: APInt.h:416
llvm::TargetLowering::C_Other
@ C_Other
Definition: TargetLowering.h:4143
llvm::TargetLoweringBase::MaxStoresPerMemcpyOptSize
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3108
llvm::DataLayout::isBigEndian
bool isBigEndian() const
Definition: DataLayout.h:242
VectorUtils.h
llvm::TargetLowering::softenSetCCOperands
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
Definition: TargetLowering.cpp:283
llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:760
llvm::ISD::SMIN
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:626
performUzpCombine
static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:15002
llvm::ISD::FMINIMUM
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:911
llvm::TargetLoweringBase::getMaximumJumpTableSize
unsigned getMaximumJumpTableSize() const
Return upper limit for number of entries in a jump table.
Definition: TargetLoweringBase.cpp:2011
llvm::cl::opt< bool >
llvm::MaskedScatterSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2516
llvm::AArch64ISD::GLD1_IMM_MERGE_ZERO
@ GLD1_IMM_MERGE_ZERO
Definition: AArch64ISelLowering.h:358
llvm::CallingConv::AArch64_VectorCall
@ AArch64_VectorCall
Definition: CallingConv.h:239
tryCombineCRC32
static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14060
llvm::APFloat
Definition: APFloat.h:701
llvm::AArch64_AM::isAdvSIMDModImmType1
static bool isAdvSIMDModImmType1(uint64_t Imm)
Definition: AArch64AddressingModes.h:451
llvm::SDNode::use_begin
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
Definition: SelectionDAGNodes.h:775
getPTest
static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op, AArch64CC::CondCode Cond)
Definition: AArch64ISelLowering.cpp:14203
llvm::AArch64ISD::SPLICE
@ SPLICE
Definition: AArch64ISelLowering.h:186
llvm::LoadInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:273
GenericSetCCInfo::CC
ISD::CondCode CC
Definition: AArch64ISelLowering.cpp:13694
llvm::TargetLoweringBase::getIRStackGuard
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
Definition: TargetLoweringBase.cpp:1964
llvm::CC_AArch64_DarwinPCS_ILP32_VarArg
bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
SetCCInfoAndKind::Info
SetCCInfo Info
Definition: AArch64ISelLowering.cpp:13713
llvm::AArch64TargetLowering::isIntDivCheap
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
Definition: AArch64ISelLowering.cpp:17724
llvm::isNullFPConstant
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
Definition: SelectionDAG.cpp:9532
llvm::ISD::SADDO
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:310
llvm::AArch64ISD::CMHI
@ CMHI
Definition: AArch64ISelLowering.h:208
llvm::TargetLoweringBase::getFrameIndexTy
MVT getFrameIndexTy(const DataLayout &DL) const
Return the type for frame index, which is determined by the alloca address space specified through th...
Definition: TargetLowering.h:358
llvm::AArch64ISD::ST3post
@ ST3post
Definition: AArch64ISelLowering.h:417
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::GlobalValue
Definition: GlobalValue.h:44
AArch64CallingConvention.h
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::AArch64TargetLowering::shouldExpandAtomicLoadInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
Definition: AArch64ISelLowering.cpp:17401
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
llvm::MVT::i64x8
@ i64x8
Definition: MachineValueType.h:273
llvm::AArch64ISD::SDOT
@ SDOT
Definition: AArch64ISelLowering.h:244
llvm::AArch64TargetLowering::AArch64TargetLowering
AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI)
Definition: AArch64ISelLowering.cpp:235
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:80
getAArch64Cmp
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, const SDLoc &dl)
Definition: AArch64ISelLowering.cpp:2863
llvm::AArch64ISD::SST1_SXTW_SCALED_PRED
@ SST1_SXTW_SCALED_PRED
Definition: AArch64ISelLowering.h:401
llvm::AArch64ISD::FRECPE
@ FRECPE
Definition: AArch64ISelLowering.h:295
isAllActivePredicate
static bool isAllActivePredicate(SDValue N)
Definition: AArch64ISelLowering.cpp:14286
llvm::AArch64ISD::INDEX_VECTOR
@ INDEX_VECTOR
Definition: AArch64ISelLowering.h:328
llvm::AArch64ISD::SQSHL_I
@ SQSHL_I
Definition: AArch64ISelLowering.h:194
llvm::AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU
@ ZERO_EXTEND_INREG_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:127
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:876
llvm::ISD::VECREDUCE_FMIN
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1229
TargetCallingConv.h
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:341
llvm::CCValAssign::SExt
@ SExt
Definition: CallingConvLower.h:37
llvm::AArch64ISD::SDIV_PRED
@ SDIV_PRED
Definition: AArch64ISelLowering.h:93
getGatherVecOpcode
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend)
Definition: AArch64ISelLowering.cpp:4175
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:93
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::AArch64II::MO_DLLIMPORT
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: AArch64BaseInfo.h:675
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::AArch64ISD::FMINNM_PRED
@ FMINNM_PRED
Definition: AArch64ISelLowering.h:85
llvm::MVT::v2i64
@ v2i64
Definition: MachineValueType.h:118
llvm::TargetLoweringBase::setHasExtractBitsInsn
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
Definition: TargetLowering.h:2151
llvm::TLSModel::LocalDynamic
@ LocalDynamic
Definition: CodeGen.h:44
llvm::AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
Definition: AArch64ISelLowering.cpp:17643
llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition: MachineFrameInfo.h:451
RuntimeLibcalls.h
llvm::AArch64ISD::UHADD
@ UHADD
Definition: AArch64ISelLowering.h:233
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:786
llvm::AArch64::rmMask
@ rmMask
Definition: AArch64ISelLowering.h:474
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1587
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:239
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:894
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1332
Scaled
@ Scaled
Definition: ARCInstrInfo.cpp:35
llvm::AArch64::SVEBitsPerBlock
static constexpr unsigned SVEBitsPerBlock
Definition: AArch64BaseInfo.h:703
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1554
llvm::AArch64::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Definition: AArch64FastISel.cpp:5103
llvm::ISD::VECREDUCE_ADD
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1233
llvm::tgtok::Int
@ Int
Definition: TGLexer.h:51
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:921
llvm::ISD::VECREDUCE_SMAX
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1238
llvm::AArch64ISD::LD1RQ_MERGE_ZERO
@ LD1RQ_MERGE_ZERO
Definition: AArch64ISelLowering.h:343
AArch64ExpandImm.h
isConcatMask
static bool isConcatMask(ArrayRef< int > Mask, EVT VT, bool SplitLHS)
Definition: AArch64ISelLowering.cpp:9056
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
GPRArgRegs
static const MCPhysReg GPRArgRegs[]
Definition: ARMISelLowering.cpp:153
llvm::SelectionDAG::getIntPtrConstant
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1471
areExtractShuffleVectors
static bool areExtractShuffleVectors(Value *Op1, Value *Op2)
Check if both Op1 and Op2 are shufflevector extracts of either the lower or upper half of the vector ...
Definition: AArch64ISelLowering.cpp:11463
isCMN
static bool isCMN(SDValue Op, ISD::CondCode CC)
Definition: AArch64ISelLowering.cpp:2471
llvm::AArch64TargetLowering::shouldExpandShift
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override
Return true if SHIFT instructions should be expanded to SHIFT_PARTS instructions, and false if a libr...
Definition: AArch64ISelLowering.cpp:17669
llvm::ISD::AssertZext
@ AssertZext
Definition: ISDOpcodes.h:62
llvm::AArch64ISD::SST1_IMM_PRED
@ SST1_IMM_PRED
Definition: AArch64ISelLowering.h:402
llvm::LegalityPredicates::all
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
Definition: LegalizerInfo.h:220
llvm::TargetLoweringBase::Promote
@ Promote
Definition: TargetLowering.h:197
llvm::APInt::logBase2
unsigned logBase2() const
Definition: APInt.h:1811
llvm::ForwardedRegister
Describes a register that needs to be forwarded from the prologue to a musttail call.
Definition: CallingConvLower.h:167
llvm::AArch64TargetLowering::isProfitableToHoist
bool isProfitableToHoist(Instruction *I) const override
Check if it is profitable to hoist instruction in then/else to if.
Definition: AArch64ISelLowering.cpp:11350
llvm::AArch64TargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ISD::SETCC ValueType.
Definition: AArch64ISelLowering.cpp:1610
llvm::ISD::TRAP
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1088
llvm::AArch64II::MO_NC
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
Definition: AArch64BaseInfo.h:664
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:38
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::AArch64ISD::VASHR
@ VASHR
Definition: AArch64ISelLowering.h:191
llvm::ISD::isConstantSplatVector
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
Definition: SelectionDAG.cpp:141
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
isConstant
static bool isConstant(const MachineInstr &MI)
Definition: AMDGPUInstructionSelector.cpp:2311
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::AArch64TargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
Provide custom lowering hooks for some operations.
Definition: AArch64ISelLowering.cpp:4725
llvm::AArch64II::MO_PAGEOFF
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
Definition: AArch64BaseInfo.h:628
llvm::ISD::SPONENTRY
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition: ISDOpcodes.h:106
llvm::MVT::v1f32
@ v1f32
Definition: MachineValueType.h:154
getPromotedVTForPredicate
static EVT getPromotedVTForPredicate(EVT VT)
Definition: AArch64ISelLowering.cpp:170
llvm::DenseMap
Definition: DenseMap.h:715
llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:761
llvm::AArch64ISD::EXT
@ EXT
Definition: AArch64ISelLowering.h:185
llvm::ISD::OutputArg
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
Definition: TargetCallingConv.h:233
llvm::DemandedBits
Definition: DemandedBits.h:40
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
llvm::ISD::VECREDUCE_FADD
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1225
llvm::TargetLoweringBase::setStackPointerRegisterToSaveRestore
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
Definition: TargetLowering.h:2134
performVectorTruncateCombine
static SDValue performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:13328
llvm::AArch64TargetLowering::CCAssignFnForCall
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Selects the correct CCAssignFn for a given CallingConvention value.
Definition: AArch64ISelLowering.cpp:5036
llvm::AArch64ISD::CSINV
@ CSINV
Definition: AArch64ISelLowering.h:69
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:896
llvm::AArch64FunctionInfo::getVarArgsGPRIndex
int getVarArgsGPRIndex() const
Definition: AArch64MachineFunctionInfo.h:310
llvm::TargetLowering::CW_Register
@ CW_Register
Definition: TargetLowering.h:4157
llvm::AArch64FunctionInfo
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Definition: AArch64MachineFunctionInfo.h:37
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:20
llvm::ISD::LRINT
@ LRINT
Definition: ISDOpcodes.h:888
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:7912
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalize
bool isBeforeLegalize() const
Definition: TargetLowering.h:3536
llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:134
llvm::IRBuilderBase::getInt8PtrTy
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
Definition: IRBuilder.h:561
llvm::SelectionDAG::getNOT
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition: SelectionDAG.cpp:1321
llvm::TargetLoweringBase::setPrefFunctionAlignment
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
Definition: TargetLowering.h:2294
llvm::MachineFrameInfo::computeMaxCallFrameSize
void computeMaxCallFrameSize(const MachineFunction &MF)
Computes the maximum size of a callframe and the AdjustsStack property.
Definition: MachineFrameInfo.cpp:187
llvm::AArch64ISD::FADDV_PRED
@ FADDV_PRED
Definition: AArch64ISelLowering.h:313
llvm::AArch64ISD::CMGT
@ CMGT
Definition: AArch64ISelLowering.h:207
llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:756
llvm::DenormalMode
Represent subnormal handling kind for floating point instruction inputs and outputs.
Definition: FloatingPointMode.h:67
llvm::SystemZISD::XC
@ XC
Definition: SystemZISelLowering.h:130
llvm::AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
Definition: AArch64ISelLowering.cpp:17507
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::AArch64_AM::isAdvSIMDModImmType8
static bool isAdvSIMDModImmType8(uint64_t Imm)
Definition: AArch64AddressingModes.h:558
llvm::ISD::ATOMIC_CMP_SWAP
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1125
llvm::AArch64ISD::FADDA_PRED
@ FADDA_PRED
Definition: AArch64ISelLowering.h:312
MCRegisterInfo.h
AArch64PerfectShuffle.h
llvm::ISD::UADDSAT
@ UADDSAT
Definition: ISDOpcodes.h:328
llvm::concatenateVectors
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition: VectorUtils.cpp:852
llvm::TargetStackID::ScalableVector
@ ScalableVector
Definition: TargetFrameLowering.h:30
llvm::ISD::ATOMIC_LOAD_ADD
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1140
llvm::APInt::sextOrSelf
APInt sextOrSelf(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:980
llvm::ISD::SSUBSAT
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:336
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:476
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2278
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:631
llvm::AArch64ISD::BICi
@ BICi
Definition: AArch64ISelLowering.h:168
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1599
llvm::SDNode::dump
void dump() const
Dump this node, for debugging.
Definition: SelectionDAGDumper.cpp:539
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:414
llvm::SelectionDAG::getSExtOrTrunc
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
Definition: SelectionDAG.cpp:1269
llvm::AArch64II::MO_NO_FLAG
@ MO_NO_FLAG
Definition: AArch64BaseInfo.h:616
llvm::MemSDNode::getOriginalAlign
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
Definition: SelectionDAGNodes.h:1263
llvm::TargetLowering::scalarizeVectorStore
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:7411
ArrayRef.h
llvm::SDValue::getScalarValueSizeInBits
uint64_t getScalarValueSizeInBits() const
Definition: SelectionDAGNodes.h:196
llvm::SelectionDAG::getAnyExtOrTrunc
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
Definition: SelectionDAG.cpp:1263
llvm::AArch64ISD::LD1RO_MERGE_ZERO
@ LD1RO_MERGE_ZERO
Definition: AArch64ISelLowering.h:344
llvm::EVT::getVectorMinNumElements
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:332
selectGatherScatterAddrMode
void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT, unsigned &Opcode, bool IsGather, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:4270
llvm::AArch64ISD::SMULL
@ SMULL
Definition: AArch64ISelLowering.h:291
OP_VTRNR
@ OP_VTRNR
Definition: ARMISelLowering.cpp:8120
GenericSetCCInfo
Helper structure to keep track of ISD::SET_CC operands.
Definition: AArch64ISelLowering.cpp:13691
getDUPLANEOp
static unsigned getDUPLANEOp(EVT EltType)
Definition: AArch64ISelLowering.cpp:9271
llvm::AArch64TargetLowering::isShuffleMaskLegal
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
Definition: AArch64ISelLowering.cpp:10606
llvm::GlobalAddressSDNode::getOffset
int64_t getOffset() const
Definition: SelectionDAGNodes.h:1713
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:157
convertMergedOpToPredOp
static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc, SelectionDAG &DAG, bool UnpredOp=false)
Definition: AArch64ISelLowering.cpp:14311
llvm::shuffle
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition: STLExtras.h:1361
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
llvm::TargetMachine::getTLSModel
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
Definition: TargetMachine.cpp:170
llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition: MachineMemOperand.h:131
llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition: MachineValueType.h:850
llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition: TargetCallingConv.h:195
llvm::ISD::MSTORE
@ MSTORE
Definition: ISDOpcodes.h:1161
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2291
llvm::MachineFrameInfo::setHasTailCall
void setHasTailCall(bool V=true)
Definition: MachineFrameInfo.h:605
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1335
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:172
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:44
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:115
llvm::AArch64ISD::FCMGE
@ FCMGE
Definition: AArch64ISelLowering.h:211
llvm::ISD::SETOGT
@ SETOGT
Definition: ISDOpcodes.h:1359
llvm::AArch64TargetLowering::getOptimalMemOpLLT
LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &FuncAttributes) const override
LLT returning variant.
Definition: AArch64ISelLowering.cpp:12007
combineSVEReductionFP
static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14246
llvm::TargetLowering::CallLoweringInfo
This structure contains all information that is necessary for lowering calls.
Definition: TargetLowering.h:3728
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SelectionDAG::getVectorShuffle
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
Definition: SelectionDAG.cpp:1777
tryCombineFixedPointConvert
static SDValue tryCombineFixedPointConvert(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:13582
llvm::SelectionDAG::getMaskedLoad
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
Definition: SelectionDAG.cpp:7621
llvm::AArch64_AM::encodeAdvSIMDModImmType2
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
Definition: AArch64AddressingModes.h:471
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::PredicateConstraint
Constraint for a predicate of the form "cmp Pred Op, OtherOp", where Op is the value the constraint a...
Definition: PredicateInfo.h:75
llvm::AArch64ISD::SQSHLU_I
@ SQSHLU_I
Definition: AArch64ISelLowering.h:196
llvm::SelectionDAG::getAtomic
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
Definition: SelectionDAG.cpp:7153
llvm::APInt::sge
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition: APInt.h:1329
llvm::ISD::MULHS
@ MULHS
Definition: ISDOpcodes.h:615
llvm::MachineFrameInfo::CreateFixedObject
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
Definition: MachineFrameInfo.cpp:83
llvm::FunctionLoweringInfo
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Definition: FunctionLoweringInfo.h:53
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
getTestBitOperand
static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:15792
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:860
llvm::AArch64ISD::EXTR
@ EXTR
Definition: AArch64ISelLowering.h:149
llvm::TargetLoweringBase::EnableExtLdPromotion
bool EnableExtLdPromotion
Definition: TargetLowering.h:3150
isVShiftRImm
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
Definition: AArch64ISelLowering.cpp:10682
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:638
llvm::AArch64TargetLowering::insertCopiesSplitCSR
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock * > &Exits) const override
Insert explicit copies in entry and exit blocks.
Definition: AArch64ISelLowering.cpp:17683
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1369
llvm::ISD::DEBUGTRAP
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1091
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::AtomicCmpXchgInst::getCompareOperand
Value * getCompareOperand()
Definition: Instructions.h:655
changeVectorFPCCToAArch64CC
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC usable with the vector...
Definition: AArch64ISelLowering.cpp:2422
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1557
llvm::AtomicRMWInst::isFloatingPointOperation
bool isFloatingPointOperation() const
Definition: Instructions.h:877
llvm::AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO
@ GLDFF1_SXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:375
llvm::TargetLowering::expandShiftParts
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Definition: TargetLowering.cpp:6632
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::MVT::all_valuetypes
static auto all_valuetypes()
SimpleValueType Iteration.
Definition: MachineValueType.h:1407
llvm::MaskedLoadSDNode
This class is used to represent an MLOAD node.
Definition: SelectionDAGNodes.h:2364
LCALLNAME5
#define LCALLNAME5(A, B)
llvm::MVT::nxv4i32
@ nxv4i32
Definition: MachineValueType.h:213
llvm::TargetOptions::TLSSize
unsigned TLSSize
Bit size of immediate TLS offsets (0 == use the default).
Definition: TargetOptions.h:259
llvm::TargetLoweringBase::AtomicExpansionKind::LLSC
@ LLSC
llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition: GetElementPtrTypeIterator.h:72
llvm::AArch64ISD::LDFF1S_MERGE_ZERO
@ LDFF1S_MERGE_ZERO
Definition: AArch64ISelLowering.h:342
llvm::AArch64ISD::SRSHR_I
@ SRSHR_I
Definition: AArch64ISelLowering.h:197
llvm::AArch64ISD::SMIN_PRED
@ SMIN_PRED
Definition: AArch64ISelLowering.h:96
llvm::AArch64ISD::FNEG_MERGE_PASSTHRU
@ FNEG_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:113
llvm::operator==
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:2030
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::GlobalValue::isThreadLocal
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:244
llvm::MachineMemOperand::MONonTemporal
@ MONonTemporal
The memory access is non-temporal.
Definition: MachineMemOperand.h:141
llvm::AArch64ISD::GLD1_SXTW_MERGE_ZERO
@ GLD1_SXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:355
llvm::AArch64ISD::GLDFF1S_IMM_MERGE_ZERO
@ GLDFF1S_IMM_MERGE_ZERO
Definition: AArch64ISelLowering.h:385
canGuaranteeTCO
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
Definition: AArch64ISelLowering.cpp:5527
llvm::AArch64TargetLowering::isLegalInterleavedAccessType
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
Definition: AArch64ISelLowering.cpp:11655
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1355
llvm::AArch64TargetLowering::isOffsetFoldingLegal
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Definition: AArch64ISelLowering.cpp:7947
llvm::AArch64FunctionInfo::getVarArgsGPRSize
unsigned getVarArgsGPRSize() const
Definition: AArch64MachineFunctionInfo.h:313
llvm::AArch64Subtarget::isCallingConvWin64
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: AArch64Subtarget.h:588
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
llvm::AArch64TargetLowering::isFPImmLegal
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Definition: AArch64ISelLowering.cpp:7954
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:8585
performNVCASTCombine
static SDValue performNVCASTCombine(SDNode *N)
Get rid of unnecessary NVCASTs (that don't change the type).
Definition: AArch64ISelLowering.cpp:16022
replaceSplatVectorStore
static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of a scalar to a vector store by scalar stores of the scalar value.
Definition: AArch64ISelLowering.cpp:14868
llvm::ShuffleVectorInst::isZeroEltSplat
bool isZeroEltSplat() const
Return true if all elements of this shuffle are the same value as the first element of exactly one so...
Definition: Instructions.h:2227
tryAdvSIMDModImm32
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
Definition: AArch64ISelLowering.cpp:9653
llvm::is_splat
bool is_splat(R &&Range)
Wrapper function around std::equal to detect if all elements in a container are same.
Definition: STLExtras.h:1701
LCALLNAME4
#define LCALLNAME4(A, B)
llvm::ISD::VECREDUCE_AND
@ VECREDUCE_AND
Definition: ISDOpcodes.h:1235
llvm::SelectionDAG::getBitcast
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
Definition: SelectionDAG.cpp:2063
llvm::MVT::getFloatingPointVT
static MVT getFloatingPointVT(unsigned BitWidth)
Definition: MachineValueType.h:1141
llvm::AArch64CC::LAST_ACTIVE
@ LAST_ACTIVE
Definition: AArch64BaseInfo.h:277
isEquivalentMaskless
static bool isEquivalentMaskless(unsigned CC, unsigned width, ISD::LoadExtType ExtType, int AddConstant, int CompConstant)
Definition: AArch64ISelLowering.cpp:15515
foldTruncStoreOfExt
static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N)
Definition: AArch64ISelLowering.cpp:15231
llvm::ISD::RETURNADDR
@ RETURNADDR
Definition: ISDOpcodes.h:95
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::AArch64ISD::FSQRT_MERGE_PASSTHRU
@ FSQRT_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:118
llvm::FastISel
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:65
llvm::AArch64ISD::GLDFF1_SXTW_MERGE_ZERO
@ GLDFF1_SXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:373
llvm::MVT::nxv4bf16
@ nxv4bf16
Definition: MachineValueType.h:237
llvm::AArch64FunctionInfo::getVarArgsFPRSize
unsigned getVarArgsFPRSize() const
Definition: AArch64MachineFunctionInfo.h:319
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
MatchRegisterName
static unsigned MatchRegisterName(StringRef Name)
llvm::SelectionDAG::setNodeMemRefs
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition: SelectionDAG.cpp:8353
llvm::AArch64Subtarget::hasNEON
bool hasNEON() const
Definition: AArch64Subtarget.h:373
llvm::ISD::POST_DEC
@ POST_DEC
Definition: ISDOpcodes.h:1304
llvm::EVT::widenIntegerVectorElementType
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition: ValueTypes.h:406
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2312
tryAdvSIMDModImm321s
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition: AArch64ISelLowering.cpp:9741
llvm::LinearPolySize::getKnownMinValue
ScalarTy getKnownMinValue() const
Returns the minimum value this size can represent.
Definition: TypeSize.h:297
llvm::AArch64ISD::BSP
@ BSP
Definition: AArch64ISelLowering.h:173
llvm::AArch64ISD::ADCS
@ ADCS
Definition: AArch64ISelLowering.h:136
llvm::AArch64_AM::encodeAdvSIMDModImmType12
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
Definition: AArch64AddressingModes.h:711
llvm::CodeGenOpt::Aggressive
@ Aggressive
Definition: CodeGen.h:56
llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition: ISDOpcodes.h:716
llvm::AArch64ISD::FIRST_NUMBER
@ FIRST_NUMBER
Definition: AArch64ISelLowering.h:50
llvm::ISD::VASTART
@ VASTART
Definition: ISDOpcodes.h:1042
isWideDUPMask
static bool isWideDUPMask(ArrayRef< int > M, EVT VT, unsigned BlockSize, unsigned &DupLaneOp)
Check if a vector shuffle corresponds to a DUP instructions with a larger element width than the vect...
Definition: AArch64ISelLowering.cpp:8780
performGatherLoadCombine
static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode, bool OnlyPackedOffsets=true)
Definition: AArch64ISelLowering.cpp:16233
llvm::MachinePointerInfo::getWithOffset
MachinePointerInfo getWithOffset(int64_t O) const
Definition: MachineMemOperand.h:80
info
lazy value info
Definition: LazyValueInfo.cpp:59
llvm::AArch64ISD::LDNF1_MERGE_ZERO
@ LDNF1_MERGE_ZERO
Definition: AArch64ISelLowering.h:339
llvm::AArch64TargetLowering::mergeStoresAfterLegalization
bool mergeStoresAfterLegalization(EVT VT) const override
SVE code generation for fixed length vectors does not custom lower BUILD_VECTOR.
Definition: AArch64ISelLowering.cpp:4982
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::MVT::bf16
@ bf16
Definition: MachineValueType.h:53
llvm::SelectionDAG::getGlobalAddress
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
Definition: SelectionDAG.cpp:1545
llvm::AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO
@ GLD1_UXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:356
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::TargetLowering::CW_Default
@ CW_Default
Definition: TargetLowering.h:4160
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::MachineFunction
Definition: MachineFunction.h:230
MAKE_CASE
#define MAKE_CASE(V)
llvm::SelectionDAG::getCALLSEQ_END
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
Definition: SelectionDAG.h:935
llvm::CCState::resultsCompatible
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
Definition: CallingConvLower.cpp:266
llvm::ISD::VSCALE
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1202
llvm::isAcquireOrStronger
bool isAcquireOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:128
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:653
Triple.h
llvm::AArch64ISD::LD1LANEpost
@ LD1LANEpost
Definition: AArch64ISelLowering.h:429
llvm::AArch64TargetLowering::enableAggressiveFMAFusion
bool enableAggressiveFMAFusion(EVT VT) const override
Enable aggressive FMA fusion on targets that want it.
Definition: AArch64ISelLowering.cpp:17741
llvm::MVT::getVectorVT
static MVT getVectorVT(MVT VT, unsigned NumElements)
Definition: MachineValueType.h:1177
llvm::MVT::fp_scalable_vector_valuetypes
static auto fp_scalable_vector_valuetypes()
Definition: MachineValueType.h:1450
tryCombineShiftImm
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:13989
llvm::CodeGenOpt::None
@ None
Definition: CodeGen.h:53
llvm::AArch64Subtarget::classifyGlobalFunctionReference
unsigned classifyGlobalFunctionReference(const GlobalValue *GV, const TargetMachine &TM) const
Definition: AArch64Subtarget.cpp:285
llvm::AArch64ISD::CLASTB_N
@ CLASTB_N
Definition: AArch64ISelLowering.h:306
llvm::CC_AArch64_Win64_CFGuard_Check
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::MVT::nxv2i32
@ nxv2i32
Definition: MachineValueType.h:212
LowerSVEIntrinsicEXT
static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14113
llvm::ISD::ConstantPool
@ ConstantPool
Definition: ISDOpcodes.h:82
llvm::AArch64ISD::ANDV_PRED
@ ANDV_PRED
Definition: AArch64ISelLowering.h:261
TargetOptions.h
performLDNT1Combine
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14681
llvm::ISD::FMAXIMUM
@ FMAXIMUM
Definition: ISDOpcodes.h:912
llvm::ISD::GlobalTLSAddress
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
llvm::ISD::UBSANTRAP
@ UBSANTRAP
UBSANTRAP - Trap with an immediate describing the kind of sanitizer failure.
Definition: ISDOpcodes.h:1095
llvm::AArch64ISD::BITREVERSE_MERGE_PASSTHRU
@ BITREVERSE_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:323
llvm::AArch64_AM::encodeLogicalImmediate
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
Definition: AArch64AddressingModes.h:282
llvm::BuildVectorSDNode::isConstant
bool isConstant() const
Definition: SelectionDAG.cpp:10488
llvm::CallingConv::Win64
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:169
llvm::SelectionDAG::getTargetConstantPool
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=None, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:699
llvm::APFloat::dump
void dump() const
Definition: APFloat.cpp:4862
llvm::AArch64ISD::LDNF1S_MERGE_ZERO
@ LDNF1S_MERGE_ZERO
Definition: AArch64ISelLowering.h:340
isEXTMask
static bool isEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseEXT, unsigned &Imm)
Definition: AArch64ISelLowering.cpp:8853
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::AArch64CC::GE
@ GE
Definition: AArch64BaseInfo.h:265
llvm::AArch64ISD::STRICT_FCMPE
@ STRICT_FCMPE
Definition: AArch64ISelLowering.h:410
llvm::MVT::fixedlen_vector_valuetypes
static auto fixedlen_vector_valuetypes()
Definition: MachineValueType.h:1425
performTBZCombine
static SDValue performTBZCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:15865
combineSVEReductionInt
static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14227
llvm::AArch64Subtarget::hasCustomCallingConv
bool hasCustomCallingConv() const
Definition: AArch64Subtarget.h:371
llvm::ArrayRef< int >
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:155
llvm::ISD::isNormalLoad
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Definition: SelectionDAGNodes.h:2678
llvm::AArch64Subtarget::outlineAtomics
bool outlineAtomics() const
Definition: AArch64Subtarget.h:518
llvm::ISD::UMAX
@ UMAX
Definition: ISDOpcodes.h:629
llvm::AArch64ISD::LD4post
@ LD4post
Definition: AArch64ISelLowering.h:415
llvm::AArch64_AM::getFP64Imm
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
Definition: AArch64AddressingModes.h:421
llvm::ISD::PRE_INC
@ PRE_INC
Definition: ISDOpcodes.h:1304
llvm::AArch64Subtarget::isXRegisterReserved
bool isXRegisterReserved(size_t i) const
Definition: AArch64Subtarget.h:366
llvm::ConstantPoolSDNode
Definition: SelectionDAGNodes.h:1824
llvm::MachineFrameInfo::setStackID
void setStackID(int ObjectIdx, uint8_t ID)
Definition: MachineFrameInfo.h:702
llvm::SelectionDAG::getSelect
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:1075
llvm::APInt::getAllOnesValue
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:567
llvm::BlockAddressSDNode::getBlockAddress
const BlockAddress * getBlockAddress() const
Definition: SelectionDAGNodes.h:2127
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::AArch64ISD::CMEQ
@ CMEQ
Definition: AArch64ISelLowering.h:205
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1541
llvm::BuildVectorSDNode::isConstantSplat
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
Definition: SelectionDAG.cpp:10288
llvm::SDNode::use_end
static use_iterator use_end()
Definition: SelectionDAGNodes.h:779
DataLayout.h
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:47
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:212
llvm::AArch64TargetLowering::fallBackToDAGISel
bool fallBackToDAGISel(const Instruction &Inst) const override
Definition: AArch64ISelLowering.cpp:17786
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::countTrailingZeros
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:156
llvm::MVT::v2i32
@ v2i32
Definition: MachineValueType.h:101
llvm::AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO
@ GLDFF1_UXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:374
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::TargetLowering::LowerToTLSEmulatedModel
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
Definition: TargetLowering.cpp:7879
llvm::TargetLowering::LowerAsmOperandForConstraint
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Definition: TargetLowering.cpp:4509
llvm::SelectionDAG::getTargetInsertSubreg
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
Definition: SelectionDAG.cpp:8713
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:353
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:950
llvm::AArch64ISD::DUPLANE8
@ DUPLANE8
Definition: AArch64ISelLowering.h:153
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
isAddSubSExt
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3659
llvm::BuildVectorSDNode
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Definition: SelectionDAGNodes.h:1929
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::AArch64Subtarget::getTargetTriple
const Triple & getTargetTriple() const
Definition: AArch64Subtarget.h:327
llvm::APFloat::isPosZero
bool isPosZero() const
Definition: APFloat.h:1228
isMergePassthruOpcode
static bool isMergePassthruOpcode(unsigned Opc)
Definition: AArch64ISelLowering.cpp:201
llvm::ShuffleVectorSDNode::getMaskElt
int getMaskElt(unsigned Idx) const
Definition: SelectionDAGNodes.h:1501
llvm::ISD::ABDU
@ ABDU
Definition: ISDOpcodes.h:622
llvm::isReleaseOrStronger
bool isReleaseOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:132
llvm::Sched::Hybrid
@ Hybrid
Definition: TargetLowering.h:101
llvm::ISD::STRICT_SINT_TO_FP
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:435
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::ISD::SREM
@ SREM
Definition: ISDOpcodes.h:244
NormalizeBuildVector
static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:9991
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::ISD::LLRINT
@ LLRINT
Definition: ISDOpcodes.h:889
emitComparison
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:2487
llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition: ISDOpcodes.h:251
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1561
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:9173
llvm::MVT::v2f32
@ v2f32
Definition: MachineValueType.h:155
llvm::ISD::ATOMIC_LOAD_SUB
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1141
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2350
A
* A
Definition: README_ALTIVEC.txt:89
hasPairwiseAdd
static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16)
Definition: AArch64ISelLowering.cpp:13399
llvm::AArch64TargetLowering::getScalarShiftAmountTy
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override
EVT is not used in-tree, but is used by out-of-tree target.
Definition: AArch64ISelLowering.cpp:1826
uint32_t
SetCCInfo::Generic
GenericSetCCInfo Generic
Definition: AArch64ISelLowering.cpp:13705
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
llvm::AArch64ISD::MOVI
@ MOVI
Definition: AArch64ISelLowering.h:159
llvm::AArch64ISD::ADDS
@ ADDS
Definition: AArch64ISelLowering.h:134
Compiler.h
llvm::AArch64FunctionInfo::setVarArgsGPRIndex
void setVarArgsGPRIndex(int Index)
Definition: AArch64MachineFunctionInfo.h:311
llvm::AArch64ISD::ADR
@ ADR
Definition: AArch64ISelLowering.h:62
llvm::AArch64FunctionInfo::getForwardedMustTailRegParms
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
Definition: AArch64MachineFunctionInfo.h:371
llvm::MVT::is128BitVector
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: MachineValueType.h:407
llvm::ISD::ArgFlagsTy
Definition: TargetCallingConv.h:27
llvm::AArch64ISD::REV64
@ REV64
Definition: AArch64ISelLowering.h:184
llvm::TargetLoweringBase::MaxStoresPerMemmoveOptSize
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3143
llvm::MaskedGatherScatterSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2461
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1121
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:95
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::VTSDNode
This class is used to represent EVT's, which are used to parameterize some operations.
Definition: SelectionDAGNodes.h:2212
llvm::AArch64ISD::CMGTz
@ CMGTz
Definition: AArch64ISelLowering.h:217
llvm::TargetLowering::DAGCombinerInfo::isCalledByLegalizer
bool isCalledByLegalizer() const
Definition: TargetLowering.h:3540
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ConstantSDNode::getSExtValue
int64_t getSExtValue() const
Definition: SelectionDAGNodes.h:1558
llvm::AArch64Subtarget::hasPAuth
bool hasPAuth() const
Definition: AArch64Subtarget.h:528
performGLD1Combine
static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:15027
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:877
llvm::SDValue::hasOneUse
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
Definition: SelectionDAGNodes.h:1157
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::AArch64ISD::SSTNT1_PRED
@ SSTNT1_PRED
Definition: AArch64ISelLowering.h:405
llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
llvm::AArch64RegisterInfo
Definition: AArch64RegisterInfo.h:26
llvm::ISD::BUILTIN_OP_END
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1249
llvm::AArch64ISD::LD1DUPpost
@ LD1DUPpost
Definition: AArch64ISelLowering.h:425
llvm::TargetLowering::getRegForInlineAsmConstraint
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
Definition: TargetLowering.cpp:4588
SetCCInfoAndKind
Helper structure to be able to read SetCC information.
Definition: AArch64ISelLowering.cpp:13712
llvm::AArch64ISD::FMA_PRED
@ FMA_PRED
Definition: AArch64ISelLowering.h:83
llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition: TargetLowering.h:2251
llvm::AArch64ISD::LS64_EXTRACT
@ LS64_EXTRACT
Definition: AArch64ISelLowering.h:335
llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition: TargetLowering.h:2205
llvm::ISD::getSetCCInverse
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition: SelectionDAG.cpp:477
llvm::AArch64TargetLowering::getTargetNodeName
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
Definition: AArch64ISelLowering.cpp:1887
llvm::SDValue::getSimpleValueType
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:183
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:103
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:878
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
GeneratePerfectShuffle
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
Definition: AArch64ISelLowering.cpp:9106
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::CCState::CheckReturn
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
Definition: CallingConvLower.cpp:104
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:131
llvm::MCRegisterInfo
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Definition: MCRegisterInfo.h:135
llvm::AArch64ISD::SVE_LD2_MERGE_ZERO
@ SVE_LD2_MERGE_ZERO
Definition: AArch64ISelLowering.h:347
llvm::CallingConv::GHC
@ GHC
Definition: CallingConv.h:51
llvm::AArch64ISD::CBNZ
@ CBNZ
Definition: AArch64ISelLowering.h:268
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:281
performST1Combine
static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14735
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:127
llvm::CCValAssign::FPExt
@ FPExt
Definition: CallingConvLower.h:51
llvm::MVT::is64BitVector
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: MachineValueType.h:398
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::ISD::VECREDUCE_XOR
@ VECREDUCE_XOR
Definition: ISDOpcodes.h:1237
llvm::CodeModel::Tiny
@ Tiny
Definition: CodeGen.h:28
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:379
llvm::AArch64CC::EQ
@ EQ
Definition: AArch64BaseInfo.h:255
llvm::AArch64TargetLowering::emitStoreConditional
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
Definition: AArch64ISelLowering.cpp:17513
llvm::MachineMemOperand::MOVolatile
@ MOVolatile
The memory access is volatile.
Definition: MachineMemOperand.h:139
llvm::ISD::ATOMIC_SWAP
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1139
llvm::AArch64ISD::ORV_PRED
@ ORV_PRED
Definition: AArch64ISelLowering.h:259
llvm::ISD::VECTOR_SPLICE
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:575
llvm::MaskedGatherScatterSDNode::getIndexType
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
Definition: SelectionDAGNodes.h:2442
AArch64ISelLowering.h
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1335
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:790
llvm::IRBuilderBase::CreateConstGEP1_32
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1857
llvm::SelectionDAG::getConstantFP
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1527
llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:726
llvm::MachinePointerInfo::getAddrSpace
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
Definition: MachineOperand.cpp:978
OP_VEXT3
@ OP_VEXT3
Definition: ARMISelLowering.cpp:8114
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:135
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::AArch64ISD::UZP2
@ UZP2
Definition: AArch64ISelLowering.h:179
llvm::TargetLowering::C_RegisterClass
@ C_RegisterClass
Definition: TargetLowering.h:4140
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::ISD::XOR
@ XOR
Definition: ISDOpcodes.h:634
llvm::TargetLoweringBase::ArgListTy
std::vector< ArgListEntry > ArgListTy
Definition: TargetLowering.h:303
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:367
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2282
llvm::SelectionDAG::getTargetJumpTable
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:693
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:183
llvm::UnivariateLinearPolyBase::getValue
ScalarTy getValue() const
Definition: TypeSize.h:232
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1561
combineAcrossLanesIntrinsic
static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14073
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::AArch64TargetLowering::shouldConvertConstantLoadToIntImm
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
Definition: AArch64ISelLowering.cpp:12205
llvm::LinearPolySize::isKnownEven
bool isKnownEven() const
A return value of true indicates we know at compile time that the number of elements (vscale * Min) i...
Definition: TypeSize.h:303
MVT_CC
static const MVT MVT_CC
Value type used for condition codes.
Definition: AArch64ISelLowering.cpp:128
llvm::ISD::FRAMEADDR
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
llvm::AArch64ISD::PTRUE
@ PTRUE
Definition: AArch64ISelLowering.h:321
performVecReduceAddCombine
static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *ST)
Definition: AArch64ISelLowering.cpp:12356
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:108
llvm::RetCC_AArch64_WebKit_JS
bool RetCC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::AtomicOrdering::Release
@ Release
llvm::AtomicSDNode
This is an SDNode representing atomic operations.
Definition: SelectionDAGNodes.h:1404
ObjCARCUtil.h
This file defines ARC utility functions which are used by various parts of the compiler.
llvm::Triple::isOSWindows
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:547
llvm::MVT::v1i32
@ v1i32
Definition: MachineValueType.h:100
llvm::AArch64ISD::ANDS
@ ANDS
Definition: AArch64ISelLowering.h:138
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:868
llvm::EVT::getEVTString
std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
Definition: ValueTypes.cpp:151
llvm::AArch64ISD::FMAXV_PRED
@ FMAXV_PRED
Definition: AArch64ISelLowering.h:314
llvm::AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU
@ FROUNDEVEN_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:117
llvm::ISD::INSERT_SUBVECTOR
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:535
llvm::HexagonISD::CP
@ CP
Definition: HexagonISelLowering.h:53
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::AArch64_AM::encodeAdvSIMDModImmType5
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
Definition: AArch64AddressingModes.h:517
llvm::AArch64Subtarget::getRegisterInfo
const AArch64RegisterInfo * getRegisterInfo() const override
Definition: AArch64Subtarget.h:319
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:338
Attributes.h
This file contains the simple types necessary to represent the attributes associated with functions a...
llvm::AArch64Subtarget::hasPerfMon
bool hasPerfMon() const
Definition: AArch64Subtarget.h:456
llvm::TargetLoweringBase::getSSPStackGuardCheck
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
Definition: TargetLoweringBase.cpp:1995
j
return j(j<< 16)
llvm::EVT::getHalfNumVectorElementsVT
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:415
getExtensionTo64Bits
static EVT getExtensionTo64Bits(const EVT &OrigVT)
Definition: AArch64ISelLowering.cpp:3562
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1378
llvm::MVT::nxv16i1
@ nxv16i1
Definition: MachineValueType.h:192
GenerateTBL
static SDValue GenerateTBL(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:9209
llvm::MVT::v8bf16
@ v8bf16
Definition: MachineValueType.h:148
llvm::AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO
@ GLD1S_SXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:366
llvm::CCState::AnalyzeCallResult
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
Definition: CallingConvLower.cpp:167
llvm::MVT::nxv2i16
@ nxv2i16
Definition: MachineValueType.h:205
llvm::isAllOnesConstant
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition: SelectionDAG.cpp:9537
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2351
llvm::CCState::getFirstUnallocated
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
Definition: CallingConvLower.h:336
llvm::CCValAssign::isMemLoc
bool isMemLoc() const
Definition: CallingConvLower.h:146
llvm::AArch64ISD::FMAXNM_PRED
@ FMAXNM_PRED
Definition: AArch64ISelLowering.h:84
llvm::TargetLoweringBase::IntrinsicInfo
Definition: TargetLowering.h:987
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::AArch64TargetLowering::isLegalAddImmediate
bool isLegalAddImmediate(int64_t) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: AArch64ISelLowering.cpp:12039
llvm::AArch64Subtarget::useRSqrt
bool useRSqrt() const
Definition: AArch64Subtarget.h:422
llvm::ISD::ArgFlagsTy::getByValSize
unsigned getByValSize() const
Definition: TargetCallingConv.h:169
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:899
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:204
emitStrictFPComparison
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl, SelectionDAG &DAG, SDValue Chain, bool IsSignaling)
Definition: AArch64ISelLowering.cpp:2476
llvm::AArch64ISD::THREAD_POINTER
@ THREAD_POINTER
Definition: AArch64ISelLowering.h:75
llvm::AArch64_AM::encodeAdvSIMDModImmType10
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
Definition: AArch64AddressingModes.h:613
std
Definition: BitVector.h:838
performExtendCombine
static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14564
llvm::AArch64CC::VS
@ VS
Definition: AArch64BaseInfo.h:261
llvm::SelectionDAG::computeKnownBits
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition: SelectionDAG.cpp:2732
llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition: TargetLowering.h:2193
llvm::GlobalAddressSDNode
Definition: SelectionDAGNodes.h:1700
llvm::KnownBits
Definition: KnownBits.h:23
performAddSubCombine
static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:13938
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:588
performSETCCCombine
static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:15738
llvm::AArch64ISD::FRSQRTS
@ FRSQRTS
Definition: AArch64ISelLowering.h:298
llvm::ISD::EXTRACT_SUBVECTOR
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:549
llvm::AArch64Subtarget::ClassifyGlobalReference
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
Definition: AArch64Subtarget.cpp:253
mayTailCallThisCC
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
Definition: AArch64ISelLowering.cpp:5533
llvm::ISD::FP_TO_SINT_SAT
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:800
llvm::SDNode::getNumOperands
unsigned getNumOperands() const
Return the number of values used by this operation.
Definition: SelectionDAGNodes.h:883
llvm::TargetLoweringBase::setIndexedLoadAction
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Definition: TargetLowering.h:2216
llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition: TargetLowering.h:249
uint16_t
llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition: TargetLowering.h:2353
CallingConvLower.h
llvm::AArch64TargetLowering::isZExtFree
bool isZExtFree(Type *Ty1, Type *Ty2) const override
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition: AArch64ISelLowering.cpp:11377
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:618
performAddSubLongCombine
static SDValue performAddSubLongCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:13896
llvm::AArch64_AM::encodeAdvSIMDModImmType4
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
Definition: AArch64AddressingModes.h:501
llvm::MaskedLoadStoreSDNode::getAddressingMode
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
Definition: SelectionDAGNodes.h:2347
llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition: SelectionDAG.cpp:9527
convertFromScalableVector
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
Definition: AArch64ISelLowering.cpp:17923
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:296
llvm::AArch64ISD::CSNEG
@ CSNEG
Definition: AArch64ISelLowering.h:70
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::AArch64ISD::STZG
@ STZG
Definition: AArch64ISelLowering.h:438
llvm::AArch64ISD::UDOT
@ UDOT
Definition: AArch64ISelLowering.h:243
llvm::ISD::BR
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:937
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1339
llvm::AArch64_IMM::expandMOVImm
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
Definition: AArch64ExpandImm.cpp:304
llvm::BuildVectorSDNode::getConstantSplatNode
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
Definition: SelectionDAG.cpp:10447
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::AArch64TargetLowering::getVaListSizeInBits
unsigned getVaListSizeInBits(const DataLayout &DL) const override
Returns the size of the platform's va_list object.
Definition: AArch64ISelLowering.cpp:17746
llvm::MemSDNode::getAlignment
unsigned getAlignment() const
Definition: SelectionDAGNodes.h:1266
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:871
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:516
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:879
llvm::ISD::getSetCCSwappedOperands
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition: SelectionDAG.cpp:454
llvm::AArch64ISD::FMOV
@ FMOV
Definition: AArch64ISelLowering.h:163
llvm::TargetLowering::ConstraintWeight
ConstraintWeight
Definition: TargetLowering.h:4147
llvm::AArch64Subtarget::hasFullFP16
bool hasFullFP16() const
Definition: AArch64Subtarget.h:457
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:870
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:440
performScatterStoreCombine
static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode, bool OnlyPackedOffsets=true)
Definition: AArch64ISelLowering.cpp:16129
llvm::ISD::OutputArg::VT
MVT VT
Definition: TargetCallingConv.h:235
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:92
llvm::LLT::getSizeInBytes
TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelTypeImpl.h:163
llvm::AArch64ISD::UADDV_PRED
@ UADDV_PRED
Definition: AArch64ISelLowering.h:254
llvm::IRBuilderBase::GetInsertBlock
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:178
Enabled
static bool Enabled
Definition: Statistic.cpp:46
llvm::AArch64TargetLowering::getPreferredVectorAction
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Definition: AArch64ISelLowering.cpp:17379
llvm::TypeSize
Definition: TypeSize.h:417
llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:264
llvm::AArch64TargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: AArch64ISelLowering.cpp:2244
llvm::AArch64ISD::NEG_MERGE_PASSTHRU
@ NEG_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:129
llvm::AArch64ISD::SRHADD
@ SRHADD
Definition: AArch64ISelLowering.h:236
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:491
Casting.h
LowerSVEIntrinsicDUP
static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14099
llvm::AArch64ISD::CSINC
@ CSINC
Definition: AArch64ISelLowering.h:71
Function.h
isValidImmForSVEVecImmAddrMode
static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes, unsigned ScalarSizeInBytes)
Check if the value of OffsetInBytes can be used as an immediate for the gather load/prefetch and scat...
Definition: AArch64ISelLowering.cpp:16102
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:200
llvm::SelectionDAG::getTargetExtractSubreg
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition: SelectionDAG.cpp:8703
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
llvm::AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO
@ GLDFF1S_SXTW_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:384
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
isAddSubZExt
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3670
llvm::AArch64ISD::TBL
@ TBL
Definition: AArch64ISelLowering.h:309
llvm::AArch64_AM::isAdvSIMDModImmType10
static bool isAdvSIMDModImmType10(uint64_t Imm)
Definition: AArch64AddressingModes.h:593
llvm::SelectionDAG::getTargetExternalSymbol
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.cpp:1728
llvm::LinearPolySize< ElementCount >::getScalable
static ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:287
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:46
llvm::CCState::getNextStackOffset
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
Definition: CallingConvLower.h:264
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::AArch64TargetLowering::initializeSplitCSR
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
Definition: AArch64ISelLowering.cpp:17677
llvm::TargetLoweringBase::LegalizeTypeAction
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
Definition: TargetLowering.h:205
llvm::ISD::VECREDUCE_OR
@ VECREDUCE_OR
Definition: ISDOpcodes.h:1236
skipExtensionForVectorMULL
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3622
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1365
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:738
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::AArch64ISD::GLD1S_SXTW_MERGE_ZERO
@ GLD1S_SXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:364
llvm::AArch64_AM::encodeAdvSIMDModImmType8
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
Definition: AArch64AddressingModes.h:568
addRequiredExtensionForVectorMULL
static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
Definition: AArch64ISelLowering.cpp:3579
OP_VDUP1
@ OP_VDUP1
Definition: ARMISelLowering.cpp:8109
llvm::AArch64ISD::SUB_PRED
@ SUB_PRED
Definition: AArch64ISelLowering.h:99
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:138
llvm::ISD::SDIV
@ SDIV
Definition: ISDOpcodes.h:242
llvm::AArch64FunctionInfo::setJumpTableEntryInfo
void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym)
Definition: AArch64MachineFunctionInfo.h:331
llvm::AArch64TargetLowering::EmitF128CSEL
MachineBasicBlock * EmitF128CSEL(MachineInstr &MI, MachineBasicBlock *BB) const
Definition: AArch64ISelLowering.cpp:2177
llvm::ISD::TargetConstant
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:158
llvm::MaskedGatherSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Definition: SelectionDAGNodes.h:2488
llvm::CallingConv::PreserveMost
@ PreserveMost
Definition: CallingConv.h:66
llvm::AArch64ISD::SVE_LD3_MERGE_ZERO
@ SVE_LD3_MERGE_ZERO
Definition: AArch64ISelLowering.h:348
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:176
llvm::AArch64ISD::FP_EXTEND_MERGE_PASSTHRU
@ FP_EXTEND_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:121
llvm::AArch64ISD::GLDFF1S_MERGE_ZERO
@ GLDFF1S_MERGE_ZERO
Definition: AArch64ISelLowering.h:379
llvm::TLSModel::Model
Model
Definition: CodeGen.h:42
llvm::TargetLoweringBase::ZeroOrOneBooleanContent
@ ZeroOrOneBooleanContent
Definition: TargetLowering.h:232
llvm::AArch64TargetLowering::getSafeStackPointerLocation
Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const override
If the target has a standard location for the unsafe stack pointer, returns the address of that locat...
Definition: AArch64ISelLowering.cpp:17628
llvm::SDNodeFlags
These are IR-level optimization flags that may be propagated to SDNodes.
Definition: SelectionDAGNodes.h:371
llvm::AArch64CC::getInvertedCondCode
static CondCode getInvertedCondCode(CondCode Code)
Definition: AArch64BaseInfo.h:303
llvm::TargetLowering::CW_Invalid
@ CW_Invalid
Definition: TargetLowering.h:4149
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::AArch64CC::GT
@ GT
Definition: AArch64BaseInfo.h:267
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:225
llvm::TargetLoweringBase::setLibcallName
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
Definition: TargetLowering.h:2835
OP_VUZPR
@ OP_VUZPR
Definition: ARMISelLowering.cpp:8116
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:922
llvm::ISD::VACOPY
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1037
llvm::AArch64ISD::ADRP
@ ADRP
Definition: AArch64ISelLowering.h:61
llvm::MVT::nxv8i8
@ nxv8i8
Definition: MachineValueType.h:199
llvm::AArch64Subtarget::hasLSE
bool hasLSE() const
Definition: AArch64Subtarget.h:377
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:107
llvm::Function::arg_begin
arg_iterator arg_begin()
Definition: Function.h:794
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:137
llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition: ISDOpcodes.h:717
llvm::AArch64ISD::FCMEQz
@ FCMEQz
Definition: AArch64ISelLowering.h:220
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:740
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::ISD::SSUBO
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:314
llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1149
llvm::AArch64_AM::encodeAdvSIMDModImmType11
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
Definition: AArch64AddressingModes.h:662
LowerXALUO
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3203
llvm::AArch64Subtarget::isTargetFuchsia
bool isTargetFuchsia() const
Definition: AArch64Subtarget.h:505
tryAdvSIMDModImmFP
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
Definition: AArch64ISelLowering.cpp:9793
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::MVT::nxv8f16
@ nxv8f16
Definition: MachineValueType.h:231
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:476
llvm::AArch64ISD::DUPLANE64
@ DUPLANE64
Definition: AArch64ISelLowering.h:156
llvm::MaskedLoadSDNode::getMask
const SDValue & getMask() const
Definition: SelectionDAGNodes.h:2382
isEssentiallyExtractHighSubvector
static bool isEssentiallyExtractHighSubvector(SDValue N)
Definition: AArch64ISelLowering.cpp:13679
llvm::MachineFunction::ArgRegPair::Reg
Register Reg
Definition: MachineFunction.h:411
llvm::FPOpFusion::Fast
@ Fast
Definition: TargetOptions.h:37
getCmpOperandFoldingProfit
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
Definition: AArch64ISelLowering.cpp:2829
llvm::AArch64ISD::ST2G
@ ST2G
Definition: AArch64ISelLowering.h:439
CodeGen.h
llvm::AArch64ISD::SST1_SXTW_PRED
@ SST1_SXTW_PRED
Definition: AArch64ISelLowering.h:399
Predicate
llvm::AArch64Subtarget::isTargetMachO
bool isTargetMachO() const
Definition: AArch64Subtarget.h:509
llvm::AArch64ISD::FSUB_PRED
@ FSUB_PRED
Definition: AArch64ISelLowering.h:89
llvm::TLSModel::InitialExec
@ InitialExec
Definition: CodeGen.h:45
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:28
llvm::AArch64CC::NV
@ NV
Definition: AArch64BaseInfo.h:270
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:301
llvm::SDNode::hasNUsesOfValue
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
Definition: SelectionDAG.cpp:9762
llvm::MemSDNode::getAlign
Align getAlign() const
Definition: SelectionDAGNodes.h:1264
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:186
llvm::SelectionDAG::GetSplitDestVTs
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
Definition: SelectionDAG.cpp:10186
llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition: TargetLowering.h:2102
isSignExtended
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:3647
llvm::ISD::SETOGE
@ SETOGE
Definition: ISDOpcodes.h:1360
llvm::AArch64TargetLowering::CCAssignFnForReturn
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC) const
Selects the correct CCAssignFn for a given CallingConvention value.
Definition: AArch64ISelLowering.cpp:5071
llvm::ISD::FP_EXTEND
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:833
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2011
llvm::TargetOptions::NoNaNsFPMath
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
Definition: TargetOptions.h:162
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
tryLowerToSLI
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:9863
llvm::AArch64ISD::REINTERPRET_CAST
@ REINTERPRET_CAST
Definition: AArch64ISelLowering.h:331
llvm::CombineLevel
CombineLevel
Definition: DAGCombine.h:15
llvm::AArch64ISD::FTRUNC_MERGE_PASSTHRU
@ FTRUNC_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:119
llvm::AArch64TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Definition: AArch64ISelLowering.cpp:17657
performTBISimplification
static bool performTBISimplification(SDValue Addr, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Simplify Addr given that the top byte of it is ignored by HW during address translation.
Definition: AArch64ISelLowering.cpp:15216
Instructions.h
Invalid
@ Invalid
Definition: AArch64ISelLowering.cpp:8145
llvm::AArch64ISD::REV16
@ REV16
Definition: AArch64ISelLowering.h:182
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:378
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
AArch64Subtarget.h
llvm::Pattern
Definition: FileCheckImpl.h:614
llvm::MVT::f128
@ f128
Definition: MachineValueType.h:58
llvm::ISD::PREFETCH
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1101
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:657
llvm::ISD::READCYCLECOUNTER
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1068
llvm::AArch64FunctionInfo::getSRetReturnReg
unsigned getSRetReturnReg() const
Definition: AArch64MachineFunctionInfo.h:322
llvm::CC_AArch64_DarwinPCS_VarArg
bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
SmallVector.h
llvm::MVT::v1i8
@ v1i8
Definition: MachineValueType.h:76
Upl
@ Upl
Definition: AArch64ISelLowering.cpp:8143
llvm::MVT::nxv8i1
@ nxv8i1
Definition: MachineValueType.h:191
llvm::ISD::FREM
@ FREM
Definition: ISDOpcodes.h:381
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition: MachineOperand.cpp:1003
SetCCInfo
Helper structure to keep track of SetCC information.
Definition: AArch64ISelLowering.cpp:13704
llvm::TargetLowering::parametersInCSRMatch
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
Definition: TargetLowering.cpp:80
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
MachineInstrBuilder.h
llvm::ConstantSDNode::isAllOnesValue
bool isAllOnesValue() const
Definition: SelectionDAGNodes.h:1567
llvm::ISD::isUnsignedIntSetCC
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1394
llvm::ISD::ArgFlagsTy::getNonZeroByValAlign
Align getNonZeroByValAlign() const
Definition: TargetCallingConv.h:153
llvm::AArch64ISD::LS64_BUILD
@ LS64_BUILD
Definition: AArch64ISelLowering.h:334
llvm::ISD::MUL
@ MUL
Definition: ISDOpcodes.h:241
llvm::ISD::UREM
@ UREM
Definition: ISDOpcodes.h:245
llvm::AArch64::RoundingBitsPos
const unsigned RoundingBitsPos
Definition: AArch64ISelLowering.h:478
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:198
llvm::AArch64ISD::GLD1_UXTW_MERGE_ZERO
@ GLD1_UXTW_MERGE_ZERO
Definition: AArch64ISelLowering.h:354
llvm::AArch64ISD::UADDV
@ UADDV
Definition: AArch64ISelLowering.h:229
llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition: TargetLowering.h:2282
llvm::CCValAssign::getValVT
MVT getValVT() const
Definition: CallingConvLower.h:143
llvm::MVT::f16
@ f16
Definition: MachineValueType.h:54
llvm::SDNode::hasPredecessorHelper
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
Definition: SelectionDAGNodes.h:824
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::AArch64ISD::UMAXV_PRED
@ UMAXV_PRED
Definition: AArch64ISelLowering.h:256
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1338
llvm::AArch64Subtarget::getPrefLoopLogAlignment
unsigned getPrefLoopLogAlignment() const
Definition: AArch64Subtarget.h:442
llvm::TargetLowering::useLoadStackGuardNode
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
Definition: TargetLowering.h:4619
llvm::SDNodeFlags::setNoUnsignedWrap
void setNoUnsignedWrap(bool b)
Definition: SelectionDAGNodes.h:411
N
#define N
llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition: TargetLoweringBase.cpp:1284
EnableCombineMGatherIntrinsics
static cl::opt< bool > EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden, cl::desc("Combine extends of AArch64 masked " "gather intrinsics"), cl::init(true))
llvm::AArch64ISD::ABS_MERGE_PASSTHRU
@ ABS_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:128
llvm::ISD::BITREVERSE
@ BITREVERSE
Definition: ISDOpcodes.h:670
llvm::ISD::LAST_INDEXED_MODE
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:1306
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::AArch64ISD::URHADD
@ URHADD
Definition: AArch64ISelLowering.h:237
llvm::AArch64ISD::GLD1_SCALED_MERGE_ZERO
@ GLD1_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:353
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:659
llvm::ISD::ADDROFRETURNADDR
@ ADDROFRETURNADDR
ADDROFRETURNADDR - Represents the llvm.addressofreturnaddress intrinsic.
Definition: ISDOpcodes.h:101
llvm::AArch64ISD::ST2LANEpost
@ ST2LANEpost
Definition: AArch64ISelLowering.h:433
llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:123
llvm::AArch64ISD::UMAXV
@ UMAXV
Definition: AArch64ISelLowering.h:251
llvm::KnownBits::commonBits
static KnownBits commonBits(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits common to LHS and RHS.
Definition: KnownBits.h:289
llvm::SelectionDAG::getRegisterMask
SDValue getRegisterMask(const uint32_t *RegMask)
Definition: SelectionDAG.cpp:1975
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::ISD::ArgFlagsTy::isInConsecutiveRegs
bool isInConsecutiveRegs() const
Definition: TargetCallingConv.h:124
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::MVT::getVT
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:526
llvm::TargetLowering::DAGCombinerInfo::CombineTo
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
Definition: DAGCombiner.cpp:837
llvm::AArch64ISD::GLD1S_MERGE_ZERO
@ GLD1S_MERGE_ZERO
Definition: AArch64ISelLowering.h:361
llvm::ISD::CTTZ
@ CTTZ
Definition: ISDOpcodes.h:667
llvm::TargetLoweringBase::getRegClassFor
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
Definition: TargetLowering.h:851
shift
http eax xorl edx cl sete al setne dl sall eax sall edx But that requires good bit subreg support this might be better It s an extra shift
Definition: README.txt:30
llvm::tgtok::Bit
@ Bit
Definition: TGLexer.h:50
getPredicateForVector
static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
Definition: AArch64ISelLowering.cpp:17904
AArch64SetCCInfo::Cmp
const SDValue * Cmp
Definition: AArch64ISelLowering.cpp:13699
llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition: MachineFunction.cpp:260
llvm::AArch64Subtarget::requiresStrictAlign
bool requiresStrictAlign() const
Definition: AArch64Subtarget.h:358
llvm::ISD::SIGNED_UNSCALED
@ SIGNED_UNSCALED
Definition: ISDOpcodes.h:1318
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
llvm::SelectionDAG::getStepVector
SDValue getStepVector(const SDLoc &DL, EVT ResVT, APInt StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
Definition: SelectionDAG.cpp:1756
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2349
performSTNT1Combine
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:14765
llvm::TargetLowering::DAGCombinerInfo::CommitTargetLoweringOpt
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
Definition: DAGCombiner.cpp:857
llvm::EVT::isPow2VectorType
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:432
llvm::AArch64ISD::TBZ
@ TBZ
Definition: AArch64ISelLowering.h:269
llvm::AArch64RegisterInfo::UpdateCustomCalleeSavedRegs
void UpdateCustomCalleeSavedRegs(MachineFunction &MF) const
Definition: AArch64RegisterInfo.cpp:155
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::TargetLoweringBase::AtomicExpansionKind::None
@ None
llvm::MVT::changeVectorElementType
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: MachineValueType.h:471
llvm::TypeSize::getKnownMinSize
ScalarTy getKnownMinSize() const
Definition: TypeSize.h:427
llvm::ISD::UMIN
@ UMIN
Definition: ISDOpcodes.h:628
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:157
llvm::AArch64ISD::FCCMP
@ FCCMP
Definition: AArch64ISelLowering.h:143
llvm::MVT::Untyped
@ Untyped
Definition: MachineValueType.h:266
MachineMemOperand.h
llvm::AArch64TargetLowering::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
Definition: AArch64ISelLowering.cpp:11168
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:558
llvm::ConstantSDNode::isNullValue
bool isNullValue() const
Definition: SelectionDAGNodes.h:1566
llvm::ISD::MULHU
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:614
llvm::TargetOptions::UnsafeFPMath
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
Definition: TargetOptions.h:150
RegName
#define RegName(no)
llvm::AArch64CC::CondCode
CondCode
Definition: AArch64BaseInfo.h:254
llvm::CC_AArch64_Win64_VarArg
bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
llvm::MaskedScatterSDNode
This class is used to represent an MSCATTER node.
Definition: SelectionDAGNodes.h:2499
llvm::ISD::LROUND
@ LROUND
Definition: ISDOpcodes.h:886
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:714
llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition: MachineBasicBlock.cpp:885
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1109
llvm::ShuffleVectorSDNode::isSplatMask
static bool isSplatMask(const int *Mask, EVT VT)
Definition: SelectionDAG.cpp:10497
tryCombineLongOpWithDup
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:13957
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
DerivedTypes.h
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:273
llvm::AArch64TargetLowering::shouldSinkOperands
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
Definition: AArch64ISelLowering.cpp:11538
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1363
llvm::MVT::v32i8
@ v32i8
Definition: MachineValueType.h:81
llvm::TargetLoweringBase::Enabled
@ Enabled
Definition: TargetLowering.h:490
emitConjunctionRec
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
Definition: AArch64ISelLowering.cpp:2703
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:257
llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:667
isREVMask
static bool isREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
Definition: AArch64ISelLowering.cpp:8894
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:45
llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
llvm::AArch64_AM::encodeAdvSIMDModImmType9
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
Definition: AArch64AddressingModes.h:579
llvm::TargetLoweringBase::shouldReduceLoadWidth
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
Definition: TargetLowering.h:1549
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
llvm::ConstantFPSDNode::isZero
bool isZero() const
Return true if the value is positive or negative zero.
Definition: SelectionDAGNodes.h:1602
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:866
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:437
ReplaceReductionResults
static void ReplaceReductionResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, unsigned InterOp, unsigned AcrossOp)
Definition: AArch64ISelLowering.cpp:17085
llvm::AArch64II::MO_G2
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
Definition: AArch64BaseInfo.h:636
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
OP_VEXT2
@ OP_VEXT2
Definition: ARMISelLowering.cpp:8113
llvm::AArch64II::MO_COFFSTUB
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: AArch64BaseInfo.h:654
getPredicateForFixedLengthVector
static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
Definition: AArch64ISelLowering.cpp:17828
llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
llvm::ISD::VAARG
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1032
llvm::AArch64FunctionInfo::setIsSplitCSR
void setIsSplitCSR(bool s)
Definition: AArch64MachineFunctionInfo.h:216
llvm::KnownBits::getBitWidth
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
performBRCONDCombine
static SDValue performBRCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:15665
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::SelectionDAG::getExternalSymbol
SDValue getExternalSymbol(const char *Sym, EVT VT)
Definition: SelectionDAG.cpp:1711
Analysis.h
llvm::AArch64ISD::BSWAP_MERGE_PASSTHRU
@ BSWAP_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:324
llvm::isMask_64
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:473
AArch64SetCCInfo::CC
AArch64CC::CondCode CC
Definition: AArch64ISelLowering.cpp:13700
llvm::AArch64TargetLowering::shouldExpandAtomicStoreInIR
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns true if the given (atomic) store should be expanded by the IR-level AtomicExpand pass into an...
Definition: AArch64ISelLowering.cpp:17392
llvm::AArch64ISD::INSR
@ INSR
Definition: AArch64ISelLowering.h:319
llvm::AArch64ISD::LD2LANEpost
@ LD2LANEpost
Definition: AArch64ISelLowering.h:430
llvm::TargetLoweringBase::MaxLoadsPerMemcmp
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
Definition: TargetLowering.h:3127
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition: MachineFrameInfo.h:367
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
performVectorCompareAndMaskUnaryOpCombine
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:12723
llvm::AArch64ISD::LASTA
@ LASTA
Definition: AArch64ISelLowering.h:307
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::AArch64TargetLowering::preferIncOfAddToSubOfNot
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Definition: AArch64ISelLowering.cpp:17736
tryFormConcatFromShuffle
static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:9076
llvm::cl::desc
Definition: CommandLine.h:414
llvm::ISD::ATOMIC_LOAD_XOR
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1145
llvm::EVT::is128BitVector
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:186
llvm::MVT::fp_valuetypes
static auto fp_valuetypes()
Definition: MachineValueType.h:1416
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1408
llvm::M1
unsigned M1(unsigned Val)
Definition: VE.h:372
llvm::AArch64ISD::ST4post
@ ST4post
Definition: AArch64ISelLowering.h:418
llvm::AArch64ISD::ST1x3post
@ ST1x3post
Definition: AArch64ISelLowering.h:423
llvm::AArch64ISD::WrapperLarge
@ WrapperLarge
Definition: AArch64ISelLowering.h:51
llvm::ISD::VECREDUCE_SMIN
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1239
llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38
llvm::AtomicRMWInst::UMax
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:758
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:726
llvm::MaskedLoadSDNode::getOffset
const SDValue & getOffset() const
Definition: SelectionDAGNodes.h:2381
raw_ostream.h
llvm::AArch64TargetLowering::isLegalAddressingMode
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: AArch64ISelLowering.cpp:12062
llvm::AArch64TargetLowering
Definition: AArch64ISelLowering.h:484
llvm::AArch64TargetLowering::getPointerTy
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: AArch64ISelLowering.h:502
llvm::SelectionDAG::getMemcpy
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:6801
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:79
llvm::ISD::VECREDUCE_UMIN
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1241
llvm::createSequentialMask
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition: VectorUtils.cpp:814
llvm::MVT::nxv2i8
@ nxv2i8
Definition: MachineValueType.h:197
llvm::AArch64ISD::LD4DUPpost
@ LD4DUPpost
Definition: AArch64ISelLowering.h:428
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::MaskedGatherScatterSDNode::getIndex
const SDValue & getIndex() const
Definition: SelectionDAGNodes.h:2462
llvm::TargetLoweringBase::finalizeLowering
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
Definition: TargetLoweringBase.cpp:2207
llvm::StringRef::size
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:157
llvm::MVT::v8f32
@ v8f32
Definition: MachineValueType.h:161
MachineFunction.h
isUZPMask
static bool isUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
Definition: AArch64ISelLowering.cpp:8937
performSignExtendInRegCombine
static SDValue performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:16335
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:880
llvm::AArch64ISD::BIT
@ BIT
Definition: AArch64ISelLowering.h:264
llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg
@ CmpXChg
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:496
llvm::SelectionDAG::getSplatValue
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
Definition: SelectionDAG.cpp:2637
llvm::AArch64ISD::BRCOND
@ BRCOND
Definition: AArch64ISelLowering.h:67
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:89
llvm::tgtok::TrueVal
@ TrueVal
Definition: TGLexer.h:61
llvm::AArch64ISD::SUNPKHI
@ SUNPKHI
Definition: AArch64ISelLowering.h:300
llvm::EVT::getDoubleNumVectorElementsVT
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:425
llvm::LLT::scalar
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelTypeImpl.h:43
llvm::LinearPolySize::divideCoefficientBy
LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:361
parsePredicateConstraint
static PredicateConstraint parsePredicateConstraint(StringRef Constraint)
Definition: AArch64ISelLowering.cpp:8148
Value.h
llvm::AArch64TargetLowering::insertSSPDeclarations
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Definition: AArch64ISelLowering.cpp:17593
llvm::AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU
@ UINT_TO_FP_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:122
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1284
llvm::AArch64ISD::FNEARBYINT_MERGE_PASSTHRU
@ FNEARBYINT_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:112
llvm::AArch64Subtarget::hasAggressiveFMA
bool hasAggressiveFMA() const
Definition: AArch64Subtarget.h:464
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:500
changeFPCCToAArch64CC
static void changeFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
Definition: AArch64ISelLowering.cpp:2329
llvm::MachineMemOperand::MONone
@ MONone
Definition: MachineMemOperand.h:133
llvm::AArch64ISD::GLDFF1_SCALED_MERGE_ZERO
@ GLDFF1_SCALED_MERGE_ZERO
Definition: AArch64ISelLowering.h:371
performSVESpliceCombine
SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:16578
llvm::AArch64Subtarget::hasBF16
bool hasBF16() const
Definition: AArch64Subtarget.h:487
llvm::AArch64TargetLowering::computeKnownBitsForTargetNode
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Definition: AArch64ISelLowering.cpp:1759
llvm::AArch64ISD::SMINV
@ SMINV
Definition: AArch64ISelLowering.h:248
llvm::ISD::STACKSAVE
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1008
llvm::TargetLoweringBase::MaxStoresPerMemmove
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
Definition: TargetLowering.h:3141
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:874
isINSMask
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
Definition: AArch64ISelLowering.cpp:9017
llvm::SelectionDAG::getExtLoad
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7443
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::EVT::isFixedLengthVector
bool isFixedLengthVector() const
Definition: ValueTypes.h:165
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:814
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:55
llvm::TargetLoweringBase::getAsmOperandValueType
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Definition: TargetLowering.h:1399
llvm::AArch64_AM::isAdvSIMDModImmType11
static bool isAdvSIMDModImmType11(uint64_t Imm)
Definition: AArch64AddressingModes.h:655
llvm::AArch64ISD::FADD_PRED
@ FADD_PRED
Definition: AArch64ISelLowering.h:81
llvm::AArch64II::MO_GOT
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
Definition: AArch64BaseInfo.h:659
llvm::AArch64ISD::ST2post
@ ST2post
Definition: AArch64ISelLowering.h:416
llvm::codegen::getCodeModel
CodeModel::Model getCodeModel()
llvm::MachinePointerInfo::getStack
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition: MachineOperand.cpp:1016
llvm::TargetLoweringBase::setPrefLoopAlignment
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
Definition: TargetLowering.h:2301
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::ISD::ROTR
@ ROTR
Definition: ISDOpcodes.h:661
llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition: MachineMemOperand.h:220
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:521
llvm::AArch64Subtarget::Falkor
@ Falkor
Definition: AArch64Subtarget.h:65
llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:441
createGPRPairNode
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
Definition: AArch64ISelLowering.cpp:17143
Debug.h
llvm::EVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:140
llvm::ISD::isConstantSplatVectorAllOnes
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
Definition: SelectionDAG.cpp:171
llvm::ISD::SET_ROUNDING
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:830
llvm::TargetRegisterInfo::getCallPreservedMask
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
Definition: TargetRegisterInfo.h:485
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::AArch64ISD::LD1S_MERGE_ZERO
@ LD1S_MERGE_ZERO
Definition: AArch64ISelLowering.h:338
llvm::AArch64ISD::STP
@ STP
Definition: AArch64ISelLowering.h:443
llvm::AArch64Subtarget::isTargetAndroid
bool isTargetAndroid() const
Definition: AArch64Subtarget.h:504
llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition: SelectionDAGNodes.h:1334
llvm::CCValAssign::needsCustom
bool needsCustom() const
Definition: CallingConvLower.h:148
llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:583
llvm::SrcOp
Definition: MachineIRBuilder.h:119
llvm::AArch64ISD::FMIN_PRED
@ FMIN_PRED
Definition: AArch64ISelLowering.h:87
llvm::AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
For some targets, an LLVM struct type must be broken down into multiple simple types,...
Definition: AArch64ISelLowering.cpp:17549
llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2391
llvm::TargetLowering::getConstraintType
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Definition: TargetLowering.cpp:4448
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:669
canEmitConjunction
static bool canEmitConjunction(const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
Definition: AArch64ISelLowering.cpp:2641
llvm::AArch64FunctionInfo::setTailCallReservedStack
void setTailCallReservedStack(unsigned bytes)
Definition: AArch64MachineFunctionInfo.h:190
llvm::SelectionDAG::getSetCC
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:1058
llvm::ISD::SADDSAT
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:327
llvm::TargetLoweringBase::LibCall
@ LibCall
Definition: TargetLowering.h:199
UINT64_MAX
#define UINT64_MAX
Definition: DataTypes.h:77
llvm::ISD::TokenFactor
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
llvm::AArch64FunctionInfo::setVarArgsGPRSize
void setVarArgsGPRSize(unsigned Size)
Definition: AArch64MachineFunctionInfo.h:314
llvm::SelectionDAG::getMergeValues
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition: SelectionDAG.cpp:7192
llvm::AArch64ISD::GLDNT1_INDEX_MERGE_ZERO
@ GLDNT1_INDEX_MERGE_ZERO
Definition: AArch64ISelLowering.h:389
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1172
llvm::AArch64_AM::encodeAdvSIMDModImmType7
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
Definition: AArch64AddressingModes.h:548
llvm::AArch64II::MO_PAGE
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
Definition: AArch64BaseInfo.h:623
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::ISD::EXTRACT_ELEMENT
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:128
llvm::AArch64ISD::DUPLANE32
@ DUPLANE32
Definition: AArch64ISelLowering.h:155
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::ISD::MSCATTER
@ MSCATTER
Definition: ISDOpcodes.h:1173
llvm::AArch64ISD::FDIV_PRED
@ FDIV_PRED
Definition: AArch64ISelLowering.h:82
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:289
llvm::ISD::isBuildVectorAllOnes
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
Definition: SelectionDAG.cpp:262
isLegalArithImmed
static bool isLegalArithImmed(uint64_t C)
Definition: AArch64ISelLowering.cpp:2453
llvm::ISD::isBuildVectorAllZeros
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
Definition: SelectionDAG.cpp:266
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::TLSModel::LocalExec
@ LocalExec
Definition: CodeGen.h:46
llvm::MVT::getIntegerVT
static MVT getIntegerVT(unsigned BitWidth)
Definition: MachineValueType.h:1158
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::AArch64ISD::FROUND_MERGE_PASSTHRU
@ FROUND_MERGE_PASSTHRU
Definition: AArch64ISelLowering.h:116
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:773
llvm::AtomicRMWInst::Max
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:754
llvm::DataLayout::getTypeAllocSize
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:498
llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:688
llvm::SelectionDAG::getCondCode
SDValue getCondCode(ISD::CondCode Cond)
Definition: SelectionDAG.cpp:1738
llvm::AArch64ISD::NVCAST
@ NVCAST
Natural vector cast.
Definition: AArch64ISelLowering.h:287
llvm::MVT::integer_fixedlen_vector_valuetypes
static auto integer_fixedlen_vector_valuetypes()
Definition: MachineValueType.h:1435
llvm::ISD::FDIV
@ FDIV
Definition: ISDOpcodes.h:380
llvm::LLT
Definition: LowLevelTypeImpl.h:40
llvm::AArch64ISD::SUBS
@ SUBS
Definition: AArch64ISelLowering.h:135